mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Merge branch 'main' into litellm_dev_04_16_2025_p2
This commit is contained in:
commit
c0e5586137
222 changed files with 6791 additions and 3471 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -86,3 +86,4 @@ litellm/proxy/db/migrations/0_init/migration.sql
|
|||
litellm/proxy/db/migrations/*
|
||||
litellm/proxy/migrations/*config.yaml
|
||||
litellm/proxy/migrations/*
|
||||
tests/litellm/litellm_core_utils/llm_cost_calc/log.txt
|
||||
|
|
|
@ -13,6 +13,15 @@ Pass-through endpoints for Vertex AI - call provider-specific endpoint, in nativ
|
|||
| End-user Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
|
||||
| Streaming | ✅ | |
|
||||
|
||||
## Supported Endpoints
|
||||
|
||||
LiteLLM supports 2 vertex ai passthrough routes:
|
||||
|
||||
1. `/vertex_ai` → routes to `https://{vertex_location}-aiplatform.googleapis.com/`
|
||||
2. `/vertex_ai/discovery` → routes to [`https://discoveryengine.googleapis.com`](https://discoveryengine.googleapis.com/)
|
||||
|
||||
## How to use
|
||||
|
||||
Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex_ai`
|
||||
|
||||
LiteLLM supports 3 flows for calling Vertex AI endpoints via pass-through:
|
||||
|
|
|
@ -1002,9 +1002,127 @@ Expected Response:
|
|||
```
|
||||
|
||||
|
||||
## **Azure Responses API**
|
||||
|
||||
| Property | Details |
|
||||
|-------|-------|
|
||||
| Description | Azure OpenAI Responses API |
|
||||
| `custom_llm_provider` on LiteLLM | `azure/` |
|
||||
| Supported Operations | `/v1/responses`|
|
||||
| Azure OpenAI Responses API | [Azure OpenAI Responses API ↗](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/responses?tabs=python-secure) |
|
||||
| Cost Tracking, Logging Support | ✅ LiteLLM will log, track cost for Responses API Requests |
|
||||
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
## Create a model response
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="litellm-sdk" label="LiteLLM SDK">
|
||||
|
||||
#### Non-streaming
|
||||
|
||||
```python showLineNumbers title="Azure Responses API"
|
||||
import litellm
|
||||
|
||||
# Non-streaming response
|
||||
response = litellm.responses(
|
||||
model="azure/o1-pro",
|
||||
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||
max_output_tokens=100,
|
||||
api_key=os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
|
||||
api_base="https://litellm8397336933.openai.azure.com/",
|
||||
api_version="2023-03-15-preview",
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
#### Streaming
|
||||
```python showLineNumbers title="Azure Responses API"
|
||||
import litellm
|
||||
|
||||
# Streaming response
|
||||
response = litellm.responses(
|
||||
model="azure/o1-pro",
|
||||
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||
stream=True,
|
||||
api_key=os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
|
||||
api_base="https://litellm8397336933.openai.azure.com/",
|
||||
api_version="2023-03-15-preview",
|
||||
)
|
||||
|
||||
for event in response:
|
||||
print(event)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="OpenAI SDK with LiteLLM Proxy">
|
||||
|
||||
First, add this to your litellm proxy config.yaml:
|
||||
```yaml showLineNumbers title="Azure Responses API"
|
||||
model_list:
|
||||
- model_name: o1-pro
|
||||
litellm_params:
|
||||
model: azure/o1-pro
|
||||
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
|
||||
api_base: https://litellm8397336933.openai.azure.com/
|
||||
api_version: 2023-03-15-preview
|
||||
```
|
||||
|
||||
Start your LiteLLM proxy:
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
|
||||
# RUNNING on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
Then use the OpenAI SDK pointed to your proxy:
|
||||
|
||||
#### Non-streaming
|
||||
```python showLineNumbers
|
||||
from openai import OpenAI
|
||||
|
||||
# Initialize client with your proxy URL
|
||||
client = OpenAI(
|
||||
base_url="http://localhost:4000", # Your proxy URL
|
||||
api_key="your-api-key" # Your proxy API key
|
||||
)
|
||||
|
||||
# Non-streaming response
|
||||
response = client.responses.create(
|
||||
model="o1-pro",
|
||||
input="Tell me a three sentence bedtime story about a unicorn."
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
#### Streaming
|
||||
```python showLineNumbers
|
||||
from openai import OpenAI
|
||||
|
||||
# Initialize client with your proxy URL
|
||||
client = OpenAI(
|
||||
base_url="http://localhost:4000", # Your proxy URL
|
||||
api_key="your-api-key" # Your proxy API key
|
||||
)
|
||||
|
||||
# Streaming response
|
||||
response = client.responses.create(
|
||||
model="o1-pro",
|
||||
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||
stream=True
|
||||
)
|
||||
|
||||
for event in response:
|
||||
print(event)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
|
||||
## Advanced
|
||||
|
|
|
@ -24,7 +24,7 @@ LiteLLM provides a BETA endpoint in the spec of [OpenAI's `/responses` API](http
|
|||
<TabItem value="litellm-sdk" label="LiteLLM SDK">
|
||||
|
||||
#### Non-streaming
|
||||
```python
|
||||
```python showLineNumbers
|
||||
import litellm
|
||||
|
||||
# Non-streaming response
|
||||
|
@ -38,7 +38,7 @@ print(response)
|
|||
```
|
||||
|
||||
#### Streaming
|
||||
```python
|
||||
```python showLineNumbers
|
||||
import litellm
|
||||
|
||||
# Streaming response
|
||||
|
@ -56,7 +56,7 @@ for event in response:
|
|||
<TabItem value="proxy" label="OpenAI SDK with LiteLLM Proxy">
|
||||
|
||||
First, add this to your litellm proxy config.yaml:
|
||||
```yaml
|
||||
```yaml showLineNumbers
|
||||
model_list:
|
||||
- model_name: o1-pro
|
||||
litellm_params:
|
||||
|
@ -74,7 +74,7 @@ litellm --config /path/to/config.yaml
|
|||
Then use the OpenAI SDK pointed to your proxy:
|
||||
|
||||
#### Non-streaming
|
||||
```python
|
||||
```python showLineNumbers
|
||||
from openai import OpenAI
|
||||
|
||||
# Initialize client with your proxy URL
|
||||
|
@ -93,7 +93,7 @@ print(response)
|
|||
```
|
||||
|
||||
#### Streaming
|
||||
```python
|
||||
```python showLineNumbers
|
||||
from openai import OpenAI
|
||||
|
||||
# Initialize client with your proxy URL
|
||||
|
@ -115,3 +115,11 @@ for event in response:
|
|||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## **Supported Providers**
|
||||
|
||||
| Provider | Link to Usage |
|
||||
|-------------|--------------------|
|
||||
| OpenAI| [Usage](#usage) |
|
||||
| Azure OpenAI| [Usage](../docs/providers/azure#responses-api) |
|
146
docs/my-website/docs/tutorials/openai_codex.md
Normal file
146
docs/my-website/docs/tutorials/openai_codex.md
Normal file
|
@ -0,0 +1,146 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Using LiteLLM with OpenAI Codex
|
||||
|
||||
This guide walks you through connecting OpenAI Codex to LiteLLM. Using LiteLLM with Codex allows teams to:
|
||||
- Access 100+ LLMs through the Codex interface
|
||||
- Use powerful models like Gemini through a familiar interface
|
||||
- Track spend and usage with LiteLLM's built-in analytics
|
||||
- Control model access with virtual keys
|
||||
|
||||
<Image img={require('../../img/litellm_codex.gif')} />
|
||||
|
||||
## Quickstart
|
||||
|
||||
:::info
|
||||
|
||||
Requires LiteLLM v1.66.3.dev5 and higher
|
||||
|
||||
:::
|
||||
|
||||
|
||||
Make sure to set up LiteLLM with the [LiteLLM Getting Started Guide](../proxy/docker_quick_start.md).
|
||||
|
||||
## 1. Install OpenAI Codex
|
||||
|
||||
Install the OpenAI Codex CLI tool globally using npm:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="npm" label="npm">
|
||||
|
||||
```bash showLineNumbers
|
||||
npm i -g @openai/codex
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="yarn" label="yarn">
|
||||
|
||||
```bash showLineNumbers
|
||||
yarn global add @openai/codex
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## 2. Start LiteLLM Proxy
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="docker" label="Docker">
|
||||
|
||||
```bash showLineNumbers
|
||||
docker run \
|
||||
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
|
||||
-p 4000:4000 \
|
||||
ghcr.io/berriai/litellm:main-latest \
|
||||
--config /app/config.yaml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="pip" label="LiteLLM CLI">
|
||||
|
||||
```bash showLineNumbers
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
LiteLLM should now be running on [http://localhost:4000](http://localhost:4000)
|
||||
|
||||
## 3. Configure LiteLLM for Model Routing
|
||||
|
||||
Ensure your LiteLLM Proxy is properly configured to route to your desired models. Create a `litellm_config.yaml` file with the following content:
|
||||
|
||||
```yaml showLineNumbers
|
||||
model_list:
|
||||
- model_name: o3-mini
|
||||
litellm_params:
|
||||
model: openai/o3-mini
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: claude-3-7-sonnet-latest
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-7-sonnet-latest
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: gemini-2.0-flash
|
||||
litellm_params:
|
||||
model: gemini/gemini-2.0-flash
|
||||
api_key: os.environ/GEMINI_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
drop_params: true
|
||||
```
|
||||
|
||||
This configuration enables routing to specific OpenAI, Anthropic, and Gemini models with explicit names.
|
||||
|
||||
## 4. Configure Codex to Use LiteLLM Proxy
|
||||
|
||||
Set the required environment variables to point Codex to your LiteLLM Proxy:
|
||||
|
||||
```bash
|
||||
# Point to your LiteLLM Proxy server
|
||||
export OPENAI_BASE_URL=http://0.0.0.0:4000
|
||||
|
||||
# Use your LiteLLM API key (if you've set up authentication)
|
||||
export OPENAI_API_KEY="sk-1234"
|
||||
```
|
||||
|
||||
## 5. Run Codex with Gemini
|
||||
|
||||
With everything configured, you can now run Codex with Gemini:
|
||||
|
||||
```bash showLineNumbers
|
||||
codex --model gemini-2.0-flash --full-auto
|
||||
```
|
||||
|
||||
<Image img={require('../../img/litellm_codex.gif')} />
|
||||
|
||||
The `--full-auto` flag allows Codex to automatically generate code without additional prompting.
|
||||
|
||||
## 6. Advanced Options
|
||||
|
||||
### Using Different Models
|
||||
|
||||
You can use any model configured in your LiteLLM proxy:
|
||||
|
||||
```bash
|
||||
# Use Claude models
|
||||
codex --model claude-3-7-sonnet-latest
|
||||
|
||||
# Use Google AI Studio Gemini models
|
||||
codex --model gemini/gemini-2.0-flash
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- If you encounter connection issues, ensure your LiteLLM Proxy is running and accessible at the specified URL
|
||||
- Verify your LiteLLM API key is valid if you're using authentication
|
||||
- Check that your model routing configuration is correct
|
||||
- For model-specific errors, ensure the model is properly configured in your LiteLLM setup
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [LiteLLM Docker Quick Start Guide](../proxy/docker_quick_start.md)
|
||||
- [OpenAI Codex GitHub Repository](https://github.com/openai/codex)
|
||||
- [LiteLLM Virtual Keys and Authentication](../proxy/virtual_keys.md)
|
BIN
docs/my-website/img/litellm_codex.gif
Normal file
BIN
docs/my-website/img/litellm_codex.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 MiB |
|
@ -443,6 +443,7 @@ const sidebars = {
|
|||
label: "Tutorials",
|
||||
items: [
|
||||
"tutorials/openweb_ui",
|
||||
"tutorials/openai_codex",
|
||||
"tutorials/msft_sso",
|
||||
"tutorials/prompt_caching",
|
||||
"tutorials/tag_management",
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
-- AlterTable
|
||||
ALTER TABLE "LiteLLM_VerificationToken" ADD COLUMN "allowed_routes" TEXT[] DEFAULT ARRAY[]::TEXT[];
|
||||
|
|
@ -169,6 +169,7 @@ model LiteLLM_VerificationToken {
|
|||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
allowed_routes String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
budget_id String?
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import glob
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
@ -82,6 +83,26 @@ class ProxyExtrasDBManager:
|
|||
logger.info(f"Found {len(migration_paths)} migrations at {migrations_dir}")
|
||||
return [Path(p).parent.name for p in migration_paths]
|
||||
|
||||
@staticmethod
|
||||
def _roll_back_migration(migration_name: str):
|
||||
"""Mark a specific migration as rolled back"""
|
||||
subprocess.run(
|
||||
["prisma", "migrate", "resolve", "--rolled-back", migration_name],
|
||||
timeout=60,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_specific_migration(migration_name: str):
|
||||
"""Mark a specific migration as applied"""
|
||||
subprocess.run(
|
||||
["prisma", "migrate", "resolve", "--applied", migration_name],
|
||||
timeout=60,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_all_migrations(migrations_dir: str):
|
||||
"""Mark all existing migrations as applied"""
|
||||
|
@ -141,7 +162,34 @@ class ProxyExtrasDBManager:
|
|||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.info(f"prisma db error: {e.stderr}, e: {e.stdout}")
|
||||
if (
|
||||
if "P3009" in e.stderr:
|
||||
# Extract the failed migration name from the error message
|
||||
migration_match = re.search(
|
||||
r"`(\d+_.*)` migration", e.stderr
|
||||
)
|
||||
if migration_match:
|
||||
failed_migration = migration_match.group(1)
|
||||
logger.info(
|
||||
f"Found failed migration: {failed_migration}, marking as rolled back"
|
||||
)
|
||||
# Mark the failed migration as rolled back
|
||||
subprocess.run(
|
||||
[
|
||||
"prisma",
|
||||
"migrate",
|
||||
"resolve",
|
||||
"--rolled-back",
|
||||
failed_migration,
|
||||
],
|
||||
timeout=60,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
logger.info(
|
||||
f"✅ Migration {failed_migration} marked as rolled back... retrying"
|
||||
)
|
||||
elif (
|
||||
"P3005" in e.stderr
|
||||
and "database schema is not empty" in e.stderr
|
||||
):
|
||||
|
@ -155,6 +203,29 @@ class ProxyExtrasDBManager:
|
|||
ProxyExtrasDBManager._resolve_all_migrations(migrations_dir)
|
||||
logger.info("✅ All migrations resolved.")
|
||||
return True
|
||||
elif (
|
||||
"P3018" in e.stderr
|
||||
): # PostgreSQL error code for duplicate column
|
||||
logger.info(
|
||||
"Migration already exists, resolving specific migration"
|
||||
)
|
||||
# Extract the migration name from the error message
|
||||
migration_match = re.search(
|
||||
r"Migration name: (\d+_.*)", e.stderr
|
||||
)
|
||||
if migration_match:
|
||||
migration_name = migration_match.group(1)
|
||||
logger.info(f"Rolling back migration {migration_name}")
|
||||
ProxyExtrasDBManager._roll_back_migration(
|
||||
migration_name
|
||||
)
|
||||
logger.info(
|
||||
f"Resolving migration {migration_name} that failed due to existing columns"
|
||||
)
|
||||
ProxyExtrasDBManager._resolve_specific_migration(
|
||||
migration_name
|
||||
)
|
||||
logger.info("✅ Migration resolved.")
|
||||
else:
|
||||
# Use prisma db push with increased timeout
|
||||
subprocess.run(
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm-proxy-extras"
|
||||
version = "0.1.9"
|
||||
version = "0.1.11"
|
||||
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
||||
authors = ["BerriAI"]
|
||||
readme = "README.md"
|
||||
|
@ -22,7 +22,7 @@ requires = ["poetry-core"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "0.1.9"
|
||||
version = "0.1.11"
|
||||
version_files = [
|
||||
"pyproject.toml:version",
|
||||
"../requirements.txt:litellm-proxy-extras==",
|
||||
|
|
|
@ -128,19 +128,19 @@ prometheus_initialize_budget_metrics: Optional[bool] = False
|
|||
require_auth_for_metrics_endpoint: Optional[bool] = False
|
||||
argilla_batch_size: Optional[int] = None
|
||||
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
|
||||
gcs_pub_sub_use_v1: Optional[
|
||||
bool
|
||||
] = False # if you want to use v1 gcs pubsub logged payload
|
||||
gcs_pub_sub_use_v1: Optional[bool] = (
|
||||
False # if you want to use v1 gcs pubsub logged payload
|
||||
)
|
||||
argilla_transformation_object: Optional[Dict[str, Any]] = None
|
||||
_async_input_callback: List[
|
||||
Union[str, Callable, CustomLogger]
|
||||
] = [] # internal variable - async custom callbacks are routed here.
|
||||
_async_success_callback: List[
|
||||
Union[str, Callable, CustomLogger]
|
||||
] = [] # internal variable - async custom callbacks are routed here.
|
||||
_async_failure_callback: List[
|
||||
Union[str, Callable, CustomLogger]
|
||||
] = [] # internal variable - async custom callbacks are routed here.
|
||||
_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
|
||||
[]
|
||||
) # internal variable - async custom callbacks are routed here.
|
||||
_async_success_callback: List[Union[str, Callable, CustomLogger]] = (
|
||||
[]
|
||||
) # internal variable - async custom callbacks are routed here.
|
||||
_async_failure_callback: List[Union[str, Callable, CustomLogger]] = (
|
||||
[]
|
||||
) # internal variable - async custom callbacks are routed here.
|
||||
pre_call_rules: List[Callable] = []
|
||||
post_call_rules: List[Callable] = []
|
||||
turn_off_message_logging: Optional[bool] = False
|
||||
|
@ -148,18 +148,18 @@ log_raw_request_response: bool = False
|
|||
redact_messages_in_exceptions: Optional[bool] = False
|
||||
redact_user_api_key_info: Optional[bool] = False
|
||||
filter_invalid_headers: Optional[bool] = False
|
||||
add_user_information_to_llm_headers: Optional[
|
||||
bool
|
||||
] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
|
||||
add_user_information_to_llm_headers: Optional[bool] = (
|
||||
None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
|
||||
)
|
||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||
### end of callbacks #############
|
||||
|
||||
email: Optional[
|
||||
str
|
||||
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
token: Optional[
|
||||
str
|
||||
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
email: Optional[str] = (
|
||||
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
token: Optional[str] = (
|
||||
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
telemetry = True
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS # OpenAI Defaults
|
||||
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
|
||||
|
@ -235,20 +235,24 @@ enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
|
|||
enable_caching_on_provider_specific_optional_params: bool = (
|
||||
False # feature-flag for caching on optional params - e.g. 'top_k'
|
||||
)
|
||||
caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
cache: Optional[
|
||||
Cache
|
||||
] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
|
||||
caching: bool = (
|
||||
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
caching_with_models: bool = (
|
||||
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
cache: Optional[Cache] = (
|
||||
None # cache object <- use this - https://docs.litellm.ai/docs/caching
|
||||
)
|
||||
default_in_memory_ttl: Optional[float] = None
|
||||
default_redis_ttl: Optional[float] = None
|
||||
default_redis_batch_cache_expiry: Optional[float] = None
|
||||
model_alias_map: Dict[str, str] = {}
|
||||
model_group_alias_map: Dict[str, str] = {}
|
||||
max_budget: float = 0.0 # set the max budget across all providers
|
||||
budget_duration: Optional[
|
||||
str
|
||||
] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
budget_duration: Optional[str] = (
|
||||
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
)
|
||||
default_soft_budget: float = (
|
||||
DEFAULT_SOFT_BUDGET # by default all litellm proxy keys have a soft budget of 50.0
|
||||
)
|
||||
|
@ -257,11 +261,15 @@ forward_traceparent_to_llm_provider: bool = False
|
|||
|
||||
_current_cost = 0.0 # private variable, used if max budget is set
|
||||
error_logs: Dict = {}
|
||||
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
|
||||
add_function_to_prompt: bool = (
|
||||
False # if function calling not supported by api, append function call details to system prompt
|
||||
)
|
||||
client_session: Optional[httpx.Client] = None
|
||||
aclient_session: Optional[httpx.AsyncClient] = None
|
||||
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
|
||||
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||
model_cost_map_url: str = (
|
||||
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||
)
|
||||
suppress_debug_info = False
|
||||
dynamodb_table_name: Optional[str] = None
|
||||
s3_callback_params: Optional[Dict] = None
|
||||
|
@ -284,7 +292,9 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
|
|||
custom_prometheus_metadata_labels: List[str] = []
|
||||
#### REQUEST PRIORITIZATION ####
|
||||
priority_reservation: Optional[Dict[str, float]] = None
|
||||
force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
|
||||
force_ipv4: bool = (
|
||||
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
|
||||
)
|
||||
module_level_aclient = AsyncHTTPHandler(
|
||||
timeout=request_timeout, client_alias="module level aclient"
|
||||
)
|
||||
|
@ -298,13 +308,13 @@ fallbacks: Optional[List] = None
|
|||
context_window_fallbacks: Optional[List] = None
|
||||
content_policy_fallbacks: Optional[List] = None
|
||||
allowed_fails: int = 3
|
||||
num_retries_per_request: Optional[
|
||||
int
|
||||
] = None # for the request overall (incl. fallbacks + model retries)
|
||||
num_retries_per_request: Optional[int] = (
|
||||
None # for the request overall (incl. fallbacks + model retries)
|
||||
)
|
||||
####### SECRET MANAGERS #####################
|
||||
secret_manager_client: Optional[
|
||||
Any
|
||||
] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
|
||||
secret_manager_client: Optional[Any] = (
|
||||
None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
|
||||
)
|
||||
_google_kms_resource_name: Optional[str] = None
|
||||
_key_management_system: Optional[KeyManagementSystem] = None
|
||||
_key_management_settings: KeyManagementSettings = KeyManagementSettings()
|
||||
|
@ -939,6 +949,7 @@ from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
|
|||
from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
|
||||
from .llms.mistral.mistral_chat_transformation import MistralConfig
|
||||
from .llms.openai.responses.transformation import OpenAIResponsesAPIConfig
|
||||
from .llms.azure.responses.transformation import AzureOpenAIResponsesAPIConfig
|
||||
from .llms.openai.chat.o_series_transformation import (
|
||||
OpenAIOSeriesConfig as OpenAIO1Config, # maintain backwards compatibility
|
||||
OpenAIOSeriesConfig,
|
||||
|
@ -1055,10 +1066,10 @@ from .types.llms.custom_llm import CustomLLMItem
|
|||
from .types.utils import GenericStreamingChunk
|
||||
|
||||
custom_provider_map: List[CustomLLMItem] = []
|
||||
_custom_providers: List[
|
||||
str
|
||||
] = [] # internal helper util, used to track names of custom providers
|
||||
disable_hf_tokenizer_download: Optional[
|
||||
bool
|
||||
] = None # disable huggingface tokenizer download. Defaults to openai clk100
|
||||
_custom_providers: List[str] = (
|
||||
[]
|
||||
) # internal helper util, used to track names of custom providers
|
||||
disable_hf_tokenizer_download: Optional[bool] = (
|
||||
None # disable huggingface tokenizer download. Defaults to openai clk100
|
||||
)
|
||||
global_disable_no_log_param: bool = False
|
||||
|
|
|
@ -304,6 +304,11 @@ def create_assistants(
|
|||
"response_format": response_format,
|
||||
}
|
||||
|
||||
# only send params that are not None
|
||||
create_assistant_data = {
|
||||
k: v for k, v in create_assistant_data.items() if v is not None
|
||||
}
|
||||
|
||||
response: Optional[Union[Coroutine[Any, Any, Assistant], Assistant]] = None
|
||||
if custom_llm_provider == "openai":
|
||||
api_base = (
|
||||
|
|
|
@ -21,6 +21,10 @@ DEFAULT_MAX_TOKENS = 256 # used when providers need a default
|
|||
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||
|
||||
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET = 1024
|
||||
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET = 2048
|
||||
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET = 4096
|
||||
|
||||
########## Networking constants ##############################################################
|
||||
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
|
||||
|
||||
|
|
|
@ -267,6 +267,7 @@ def generic_cost_per_token(
|
|||
## CALCULATE OUTPUT COST
|
||||
text_tokens = usage.completion_tokens
|
||||
audio_tokens = 0
|
||||
reasoning_tokens = 0
|
||||
if usage.completion_tokens_details is not None:
|
||||
audio_tokens = (
|
||||
cast(
|
||||
|
@ -282,7 +283,13 @@ def generic_cost_per_token(
|
|||
)
|
||||
or usage.completion_tokens # default to completion tokens, if this field is not set
|
||||
)
|
||||
|
||||
reasoning_tokens = (
|
||||
cast(
|
||||
Optional[int],
|
||||
getattr(usage.completion_tokens_details, "reasoning_tokens", 0),
|
||||
)
|
||||
or 0
|
||||
)
|
||||
## TEXT COST
|
||||
completion_cost = float(text_tokens) * completion_base_cost
|
||||
|
||||
|
@ -290,6 +297,10 @@ def generic_cost_per_token(
|
|||
"output_cost_per_audio_token"
|
||||
)
|
||||
|
||||
_output_cost_per_reasoning_token: Optional[float] = model_info.get(
|
||||
"output_cost_per_reasoning_token"
|
||||
)
|
||||
|
||||
## AUDIO COST
|
||||
if (
|
||||
_output_cost_per_audio_token is not None
|
||||
|
@ -298,4 +309,12 @@ def generic_cost_per_token(
|
|||
):
|
||||
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
|
||||
|
||||
## REASONING COST
|
||||
if (
|
||||
_output_cost_per_reasoning_token is not None
|
||||
and reasoning_tokens
|
||||
and reasoning_tokens > 0
|
||||
):
|
||||
completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token
|
||||
|
||||
return prompt_cost, completion_cost
|
||||
|
|
|
@ -7,6 +7,9 @@ import httpx
|
|||
import litellm
|
||||
from litellm.constants import (
|
||||
DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS,
|
||||
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||
RESPONSE_FORMAT_TOOL_NAME,
|
||||
)
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
|
@ -276,11 +279,20 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
|||
if reasoning_effort is None:
|
||||
return None
|
||||
elif reasoning_effort == "low":
|
||||
return AnthropicThinkingParam(type="enabled", budget_tokens=1024)
|
||||
return AnthropicThinkingParam(
|
||||
type="enabled",
|
||||
budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||
)
|
||||
elif reasoning_effort == "medium":
|
||||
return AnthropicThinkingParam(type="enabled", budget_tokens=2048)
|
||||
return AnthropicThinkingParam(
|
||||
type="enabled",
|
||||
budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||
)
|
||||
elif reasoning_effort == "high":
|
||||
return AnthropicThinkingParam(type="enabled", budget_tokens=4096)
|
||||
return AnthropicThinkingParam(
|
||||
type="enabled",
|
||||
budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
|
||||
|
||||
|
|
|
@ -288,6 +288,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
|
|||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
client=client,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
thread_message: OpenAIMessage = openai_client.beta.threads.messages.create( # type: ignore
|
||||
|
|
|
@ -79,7 +79,7 @@ class AzureOpenAIO1Config(OpenAIOSeriesConfig):
|
|||
return True
|
||||
|
||||
def is_o_series_model(self, model: str) -> bool:
|
||||
return "o1" in model or "o3" in model or "o_series/" in model
|
||||
return "o1" in model or "o3" in model or "o4" in model or "o_series/" in model
|
||||
|
||||
def transform_request(
|
||||
self,
|
||||
|
|
94
litellm/llms/azure/responses/transformation.py
Normal file
94
litellm/llms/azure/responses/transformation.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
from typing import TYPE_CHECKING, Any, Optional, cast
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import *
|
||||
from litellm.utils import _add_path_to_api_base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||
|
||||
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
api_key: Optional[str] = None,
|
||||
) -> dict:
|
||||
api_key = (
|
||||
api_key
|
||||
or litellm.api_key
|
||||
or litellm.azure_key
|
||||
or get_secret_str("AZURE_OPENAI_API_KEY")
|
||||
or get_secret_str("AZURE_API_KEY")
|
||||
)
|
||||
|
||||
headers.update(
|
||||
{
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
)
|
||||
return headers
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Constructs a complete URL for the API request.
|
||||
|
||||
Args:
|
||||
- api_base: Base URL, e.g.,
|
||||
"https://litellm8397336933.openai.azure.com"
|
||||
OR
|
||||
"https://litellm8397336933.openai.azure.com/openai/responses?api-version=2024-05-01-preview"
|
||||
- model: Model name.
|
||||
- optional_params: Additional query parameters, including "api_version".
|
||||
- stream: If streaming is required (optional).
|
||||
|
||||
Returns:
|
||||
- A complete URL string, e.g.,
|
||||
"https://litellm8397336933.openai.azure.com/openai/responses?api-version=2024-05-01-preview"
|
||||
"""
|
||||
api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE")
|
||||
if api_base is None:
|
||||
raise ValueError(
|
||||
f"api_base is required for Azure AI Studio. Please set the api_base parameter. Passed `api_base={api_base}`"
|
||||
)
|
||||
original_url = httpx.URL(api_base)
|
||||
|
||||
# Extract api_version or use default
|
||||
api_version = cast(Optional[str], litellm_params.get("api_version"))
|
||||
|
||||
# Create a new dictionary with existing params
|
||||
query_params = dict(original_url.params)
|
||||
|
||||
# Add api_version if needed
|
||||
if "api-version" not in query_params and api_version:
|
||||
query_params["api-version"] = api_version
|
||||
|
||||
# Add the path to the base URL
|
||||
if "/openai/responses" not in api_base:
|
||||
new_url = _add_path_to_api_base(
|
||||
api_base=api_base, ending_path="/openai/responses"
|
||||
)
|
||||
else:
|
||||
new_url = api_base
|
||||
|
||||
# Use the new query_params dictionary
|
||||
final_url = httpx.URL(new_url).copy_with(params=query_params)
|
||||
|
||||
return str(final_url)
|
|
@ -73,7 +73,10 @@ class BaseResponsesAPIConfig(ABC):
|
|||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
|
|
|
@ -462,7 +462,7 @@ class BaseLLMHTTPHandler:
|
|||
)
|
||||
|
||||
if fake_stream is True:
|
||||
model_response: (ModelResponse) = provider_config.transform_response(
|
||||
model_response: ModelResponse = provider_config.transform_response(
|
||||
model=model,
|
||||
raw_response=response,
|
||||
model_response=litellm.ModelResponse(),
|
||||
|
@ -595,7 +595,7 @@ class BaseLLMHTTPHandler:
|
|||
)
|
||||
|
||||
if fake_stream is True:
|
||||
model_response: (ModelResponse) = provider_config.transform_response(
|
||||
model_response: ModelResponse = provider_config.transform_response(
|
||||
model=model,
|
||||
raw_response=response,
|
||||
model_response=litellm.ModelResponse(),
|
||||
|
@ -1055,9 +1055,16 @@ class BaseLLMHTTPHandler:
|
|||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
# Check if streaming is requested
|
||||
stream = response_api_optional_request_params.get("stream", False)
|
||||
|
||||
api_base = responses_api_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
api_key=litellm_params.api_key,
|
||||
model=model,
|
||||
optional_params=response_api_optional_request_params,
|
||||
litellm_params=dict(litellm_params),
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
data = responses_api_provider_config.transform_responses_api_request(
|
||||
|
@ -1079,9 +1086,6 @@ class BaseLLMHTTPHandler:
|
|||
},
|
||||
)
|
||||
|
||||
# Check if streaming is requested
|
||||
stream = response_api_optional_request_params.get("stream", False)
|
||||
|
||||
try:
|
||||
if stream:
|
||||
# For streaming, use stream=True in the request
|
||||
|
@ -1170,9 +1174,16 @@ class BaseLLMHTTPHandler:
|
|||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
# Check if streaming is requested
|
||||
stream = response_api_optional_request_params.get("stream", False)
|
||||
|
||||
api_base = responses_api_provider_config.get_complete_url(
|
||||
api_base=litellm_params.api_base,
|
||||
api_key=litellm_params.api_key,
|
||||
model=model,
|
||||
optional_params=response_api_optional_request_params,
|
||||
litellm_params=dict(litellm_params),
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
data = responses_api_provider_config.transform_responses_api_request(
|
||||
|
@ -1193,8 +1204,6 @@ class BaseLLMHTTPHandler:
|
|||
"headers": headers,
|
||||
},
|
||||
)
|
||||
# Check if streaming is requested
|
||||
stream = response_api_optional_request_params.get("stream", False)
|
||||
|
||||
try:
|
||||
if stream:
|
||||
|
|
|
@ -7,6 +7,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
|
|||
)
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.types.llms.vertex_ai import ContentType, PartType
|
||||
from litellm.utils import supports_reasoning
|
||||
|
||||
from ...vertex_ai.gemini.transformation import _gemini_convert_messages_with_history
|
||||
from ...vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
|
||||
|
@ -67,7 +68,7 @@ class GoogleAIStudioGeminiConfig(VertexGeminiConfig):
|
|||
return super().get_config()
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||
return [
|
||||
supported_params = [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
|
@ -83,6 +84,10 @@ class GoogleAIStudioGeminiConfig(VertexGeminiConfig):
|
|||
"frequency_penalty",
|
||||
"modalities",
|
||||
]
|
||||
if supports_reasoning(model):
|
||||
supported_params.append("reasoning_effort")
|
||||
supported_params.append("thinking")
|
||||
return supported_params
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
|
|
|
@ -131,7 +131,10 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
|
|||
|
||||
def is_model_o_series_model(self, model: str) -> bool:
|
||||
if model in litellm.open_ai_chat_completion_models and (
|
||||
"o1" in model or "o3" in model
|
||||
"o1" in model
|
||||
or "o3" in model
|
||||
or "o4"
|
||||
in model # [TODO] make this a more generic check (e.g. using `openai-o-series` as provider like gemini)
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -110,7 +110,10 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
|
|||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
|
|
|
@ -401,6 +401,7 @@ def construct_target_url(
|
|||
Constructed Url:
|
||||
POST https://LOCATION-aiplatform.googleapis.com/{version}/projects/PROJECT_ID/locations/LOCATION/cachedContents
|
||||
"""
|
||||
|
||||
new_base_url = httpx.URL(base_url)
|
||||
if "locations" in requested_route: # contains the target project id + location
|
||||
if vertex_project and vertex_location:
|
||||
|
|
|
@ -24,6 +24,11 @@ import litellm
|
|||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.litellm_logging
|
||||
from litellm import verbose_logger
|
||||
from litellm.constants import (
|
||||
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||
)
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
|
@ -31,6 +36,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
HTTPHandler,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.llms.anthropic import AnthropicThinkingParam
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionResponseMessage,
|
||||
|
@ -45,6 +51,7 @@ from litellm.types.llms.vertex_ai import (
|
|||
ContentType,
|
||||
FunctionCallingConfig,
|
||||
FunctionDeclaration,
|
||||
GeminiThinkingConfig,
|
||||
GenerateContentResponseBody,
|
||||
HttpxPartType,
|
||||
LogprobsResult,
|
||||
|
@ -59,7 +66,7 @@ from litellm.types.utils import (
|
|||
TopLogprob,
|
||||
Usage,
|
||||
)
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse, supports_reasoning
|
||||
|
||||
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
||||
from ..common_utils import VertexAIError, _build_vertex_schema
|
||||
|
@ -190,7 +197,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
return super().get_config()
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||
return [
|
||||
supported_params = [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
|
@ -210,6 +217,10 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
"top_logprobs",
|
||||
"modalities",
|
||||
]
|
||||
if supports_reasoning(model):
|
||||
supported_params.append("reasoning_effort")
|
||||
supported_params.append("thinking")
|
||||
return supported_params
|
||||
|
||||
def map_tool_choice_values(
|
||||
self, model: str, tool_choice: Union[str, dict]
|
||||
|
@ -313,10 +324,14 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
if isinstance(old_schema, list):
|
||||
for item in old_schema:
|
||||
if isinstance(item, dict):
|
||||
item = _build_vertex_schema(parameters=item, add_property_ordering=True)
|
||||
item = _build_vertex_schema(
|
||||
parameters=item, add_property_ordering=True
|
||||
)
|
||||
|
||||
elif isinstance(old_schema, dict):
|
||||
old_schema = _build_vertex_schema(parameters=old_schema, add_property_ordering=True)
|
||||
old_schema = _build_vertex_schema(
|
||||
parameters=old_schema, add_property_ordering=True
|
||||
)
|
||||
return old_schema
|
||||
|
||||
def apply_response_schema_transformation(self, value: dict, optional_params: dict):
|
||||
|
@ -343,6 +358,43 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
value=optional_params["response_schema"]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _map_reasoning_effort_to_thinking_budget(
|
||||
reasoning_effort: str,
|
||||
) -> GeminiThinkingConfig:
|
||||
if reasoning_effort == "low":
|
||||
return {
|
||||
"thinkingBudget": DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||
"includeThoughts": True,
|
||||
}
|
||||
elif reasoning_effort == "medium":
|
||||
return {
|
||||
"thinkingBudget": DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||
"includeThoughts": True,
|
||||
}
|
||||
elif reasoning_effort == "high":
|
||||
return {
|
||||
"thinkingBudget": DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||
"includeThoughts": True,
|
||||
}
|
||||
else:
|
||||
raise ValueError(f"Invalid reasoning effort: {reasoning_effort}")
|
||||
|
||||
@staticmethod
|
||||
def _map_thinking_param(
|
||||
thinking_param: AnthropicThinkingParam,
|
||||
) -> GeminiThinkingConfig:
|
||||
thinking_enabled = thinking_param.get("type") == "enabled"
|
||||
thinking_budget = thinking_param.get("budget_tokens")
|
||||
|
||||
params: GeminiThinkingConfig = {}
|
||||
if thinking_enabled:
|
||||
params["includeThoughts"] = True
|
||||
if thinking_budget:
|
||||
params["thinkingBudget"] = thinking_budget
|
||||
|
||||
return params
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: Dict,
|
||||
|
@ -399,6 +451,16 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
optional_params["tool_choice"] = _tool_choice_value
|
||||
elif param == "seed":
|
||||
optional_params["seed"] = value
|
||||
elif param == "reasoning_effort" and isinstance(value, str):
|
||||
optional_params[
|
||||
"thinkingConfig"
|
||||
] = VertexGeminiConfig._map_reasoning_effort_to_thinking_budget(value)
|
||||
elif param == "thinking":
|
||||
optional_params[
|
||||
"thinkingConfig"
|
||||
] = VertexGeminiConfig._map_thinking_param(
|
||||
cast(AnthropicThinkingParam, value)
|
||||
)
|
||||
elif param == "modalities" and isinstance(value, list):
|
||||
response_modalities = []
|
||||
for modality in value:
|
||||
|
@ -514,19 +576,27 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
|
||||
def get_assistant_content_message(
|
||||
self, parts: List[HttpxPartType]
|
||||
) -> Optional[str]:
|
||||
_content_str = ""
|
||||
) -> Tuple[Optional[str], Optional[str]]:
|
||||
content_str: Optional[str] = None
|
||||
reasoning_content_str: Optional[str] = None
|
||||
for part in parts:
|
||||
_content_str = ""
|
||||
if "text" in part:
|
||||
_content_str += part["text"]
|
||||
elif "inlineData" in part: # base64 encoded image
|
||||
_content_str += "data:{};base64,{}".format(
|
||||
part["inlineData"]["mimeType"], part["inlineData"]["data"]
|
||||
)
|
||||
if part.get("thought") is True:
|
||||
if reasoning_content_str is None:
|
||||
reasoning_content_str = ""
|
||||
reasoning_content_str += _content_str
|
||||
else:
|
||||
if content_str is None:
|
||||
content_str = ""
|
||||
content_str += _content_str
|
||||
|
||||
if _content_str:
|
||||
return _content_str
|
||||
return None
|
||||
return content_str, reasoning_content_str
|
||||
|
||||
def _transform_parts(
|
||||
self,
|
||||
|
@ -677,6 +747,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
audio_tokens: Optional[int] = None
|
||||
text_tokens: Optional[int] = None
|
||||
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
|
||||
reasoning_tokens: Optional[int] = None
|
||||
if "cachedContentTokenCount" in completion_response["usageMetadata"]:
|
||||
cached_tokens = completion_response["usageMetadata"][
|
||||
"cachedContentTokenCount"
|
||||
|
@ -687,7 +758,10 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
audio_tokens = detail["tokenCount"]
|
||||
elif detail["modality"] == "TEXT":
|
||||
text_tokens = detail["tokenCount"]
|
||||
|
||||
if "thoughtsTokenCount" in completion_response["usageMetadata"]:
|
||||
reasoning_tokens = completion_response["usageMetadata"][
|
||||
"thoughtsTokenCount"
|
||||
]
|
||||
prompt_tokens_details = PromptTokensDetailsWrapper(
|
||||
cached_tokens=cached_tokens,
|
||||
audio_tokens=audio_tokens,
|
||||
|
@ -703,6 +777,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
),
|
||||
total_tokens=completion_response["usageMetadata"].get("totalTokenCount", 0),
|
||||
prompt_tokens_details=prompt_tokens_details,
|
||||
reasoning_tokens=reasoning_tokens,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
@ -731,11 +806,16 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
citation_metadata.append(candidate["citationMetadata"])
|
||||
|
||||
if "parts" in candidate["content"]:
|
||||
chat_completion_message[
|
||||
"content"
|
||||
] = VertexGeminiConfig().get_assistant_content_message(
|
||||
(
|
||||
content,
|
||||
reasoning_content,
|
||||
) = VertexGeminiConfig().get_assistant_content_message(
|
||||
parts=candidate["content"]["parts"]
|
||||
)
|
||||
if content is not None:
|
||||
chat_completion_message["content"] = content
|
||||
if reasoning_content is not None:
|
||||
chat_completion_message["reasoning_content"] = reasoning_content
|
||||
|
||||
functions, tools = self._transform_parts(
|
||||
parts=candidate["content"]["parts"],
|
||||
|
|
|
@ -1471,6 +1471,64 @@
|
|||
"litellm_provider": "openai",
|
||||
"supported_endpoints": ["/v1/audio/speech"]
|
||||
},
|
||||
"azure/gpt-4.1": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 1047576,
|
||||
"max_output_tokens": 32768,
|
||||
"input_cost_per_token": 2e-6,
|
||||
"output_cost_per_token": 8e-6,
|
||||
"input_cost_per_token_batches": 1e-6,
|
||||
"output_cost_per_token_batches": 4e-6,
|
||||
"cache_read_input_token_cost": 0.5e-6,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": true,
|
||||
"supports_web_search": true,
|
||||
"search_context_cost_per_query": {
|
||||
"search_context_size_low": 30e-3,
|
||||
"search_context_size_medium": 35e-3,
|
||||
"search_context_size_high": 50e-3
|
||||
}
|
||||
},
|
||||
"azure/gpt-4.1-2025-04-14": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 1047576,
|
||||
"max_output_tokens": 32768,
|
||||
"input_cost_per_token": 2e-6,
|
||||
"output_cost_per_token": 8e-6,
|
||||
"input_cost_per_token_batches": 1e-6,
|
||||
"output_cost_per_token_batches": 4e-6,
|
||||
"cache_read_input_token_cost": 0.5e-6,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": true,
|
||||
"supports_web_search": true,
|
||||
"search_context_cost_per_query": {
|
||||
"search_context_size_low": 30e-3,
|
||||
"search_context_size_medium": 35e-3,
|
||||
"search_context_size_high": 50e-3
|
||||
}
|
||||
},
|
||||
"azure/gpt-4o-mini-realtime-preview-2024-12-17": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
|
@ -1647,6 +1705,23 @@
|
|||
"supports_system_messages": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"azure/o4-mini-2025-04-16": {
|
||||
"max_tokens": 100000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 100000,
|
||||
"input_cost_per_token": 1.1e-6,
|
||||
"output_cost_per_token": 4.4e-6,
|
||||
"cache_read_input_token_cost": 2.75e-7,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"azure/o3-mini-2025-01-31": {
|
||||
"max_tokens": 100000,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -5093,6 +5168,64 @@
|
|||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini/gemini-2.5-flash-preview-04-17": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 1e-6,
|
||||
"input_cost_per_token": 0.15e-6,
|
||||
"output_cost_per_token": 0.6e-6,
|
||||
"output_cost_per_reasoning_token": 3.5e-6,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"rpm": 10,
|
||||
"tpm": 250000,
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
||||
},
|
||||
"gemini-2.5-flash-preview-04-17": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 1e-6,
|
||||
"input_cost_per_token": 0.15e-6,
|
||||
"output_cost_per_token": 0.6e-6,
|
||||
"output_cost_per_reasoning_token": 3.5e-6,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_reasoning": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
||||
},
|
||||
"gemini-2.0-flash": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{96443:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=96443)}),_N_E=n.O()}]);
|
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{6580:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=6580)}),_N_E=n.O()}]);
|
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{11790:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=11790)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{21024:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=21024)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{8672:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tier."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=8672)}),_N_E=e.O()}]);
|
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{32922:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tier."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=32922)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{20169:function(e,n,t){Promise.resolve().then(t.t.bind(t,12846,23)),Promise.resolve().then(t.t.bind(t,19107,23)),Promise.resolve().then(t.t.bind(t,61060,23)),Promise.resolve().then(t.t.bind(t,4707,23)),Promise.resolve().then(t.t.bind(t,80,23)),Promise.resolve().then(t.t.bind(t,36423,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,117],function(){return n(54278),n(20169)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{10264:function(e,n,t){Promise.resolve().then(t.t.bind(t,12846,23)),Promise.resolve().then(t.t.bind(t,19107,23)),Promise.resolve().then(t.t.bind(t,61060,23)),Promise.resolve().then(t.t.bind(t,4707,23)),Promise.resolve().then(t.t.bind(t,80,23)),Promise.resolve().then(t.t.bind(t,36423,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,117],function(){return n(54278),n(10264)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/a33d9ae33620c3b8.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[37185,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"817\",\"static/chunks/817-9919df897a7c474b.js\",\"250\",\"static/chunks/250-f21e8c1de1717077.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-46db98575bd064ba.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"CRVaojwLOuPGzfRFX3bIE\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a33d9ae33620c3b8.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/be22292d8ac48764.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[94226,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-59f99bfbf676f282.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"875\",\"static/chunks/875-85b7d9e9afef48d5.js\",\"250\",\"static/chunks/250-7b7f46d48724f856.js\",\"699\",\"static/chunks/699-99a8a36b70ac90c1.js\",\"931\",\"static/chunks/app/page-1e545df8fad65452.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"u3E41CAVE1NTuNPVcBvVa\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/be22292d8ac48764.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[37185,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","817","static/chunks/817-9919df897a7c474b.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-46db98575bd064ba.js"],"default",1]
|
||||
3:I[94226,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","875","static/chunks/875-85b7d9e9afef48d5.js","250","static/chunks/250-7b7f46d48724f856.js","699","static/chunks/699-99a8a36b70ac90c1.js","931","static/chunks/app/page-1e545df8fad65452.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["CRVaojwLOuPGzfRFX3bIE",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/a33d9ae33620c3b8.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["u3E41CAVE1NTuNPVcBvVa",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/be22292d8ac48764.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
|
||||
3:I[52829,["42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-7b7f46d48724f856.js","699","static/chunks/699-99a8a36b70ac90c1.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["CRVaojwLOuPGzfRFX3bIE",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/a33d9ae33620c3b8.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["u3E41CAVE1NTuNPVcBvVa",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/be22292d8ac48764.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-f21e8c1de1717077.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-59f99bfbf676f282.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-7b7f46d48724f856.js","461","static/chunks/app/onboarding/page-82b2525e758a7201.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["CRVaojwLOuPGzfRFX3bIE",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/a33d9ae33620c3b8.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["u3E41CAVE1NTuNPVcBvVa",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/be22292d8ac48764.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -21,18 +21,11 @@ model_list:
|
|||
model: databricks/databricks-claude-3-7-sonnet
|
||||
api_key: os.environ/DATABRICKS_API_KEY
|
||||
api_base: os.environ/DATABRICKS_API_BASE
|
||||
- model_name: "gpt-4o-realtime-preview"
|
||||
- model_name: "gpt-4.1"
|
||||
litellm_params:
|
||||
model: azure/gpt-4o-realtime-preview-2
|
||||
model: azure/gpt-4.1
|
||||
api_key: os.environ/AZURE_API_KEY_REALTIME
|
||||
api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/
|
||||
model_info:
|
||||
base_model: azure/gpt-4o-realtime-preview-2024-10-01
|
||||
- model_name: "vertex_ai/gemini-1.5-pro-001"
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.5-pro-001
|
||||
vertex_credentials: {"project_id": "krris-m2f9a9i7", "location": "us-central1"}
|
||||
api_base: https://us-central1-aiplatform.googleapis.com/v1
|
||||
|
||||
litellm_settings:
|
||||
num_retries: 0
|
||||
|
|
|
@ -287,6 +287,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/v1/models",
|
||||
# token counter
|
||||
"/utils/token_counter",
|
||||
"/utils/transform_request",
|
||||
# rerank
|
||||
"/rerank",
|
||||
"/v1/rerank",
|
||||
|
@ -462,6 +463,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/team/member_delete",
|
||||
"/team/permissions_list",
|
||||
"/team/permissions_update",
|
||||
"/team/daily/activity",
|
||||
"/model/new",
|
||||
"/model/update",
|
||||
"/model/delete",
|
||||
|
@ -667,6 +669,7 @@ class KeyRequestBase(GenerateRequestBase):
|
|||
budget_id: Optional[str] = None
|
||||
tags: Optional[List[str]] = None
|
||||
enforced_params: Optional[List[str]] = None
|
||||
allowed_routes: Optional[list] = []
|
||||
|
||||
|
||||
class GenerateKeyRequest(KeyRequestBase):
|
||||
|
@ -816,6 +819,8 @@ class NewUserResponse(GenerateKeyResponse):
|
|||
teams: Optional[list] = None
|
||||
user_alias: Optional[str] = None
|
||||
model_max_budget: Optional[dict] = None
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class UpdateUserRequest(GenerateRequestBase):
|
||||
|
@ -1144,6 +1149,7 @@ class LiteLLM_TeamTable(TeamBase):
|
|||
budget_reset_at: Optional[datetime] = None
|
||||
model_id: Optional[int] = None
|
||||
litellm_model_table: Optional[LiteLLM_ModelTable] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
@ -1491,6 +1497,7 @@ class LiteLLM_VerificationToken(LiteLLMPydanticObjectBase):
|
|||
budget_duration: Optional[str] = None
|
||||
budget_reset_at: Optional[datetime] = None
|
||||
allowed_cache_controls: Optional[list] = []
|
||||
allowed_routes: Optional[list] = []
|
||||
permissions: Dict = {}
|
||||
model_spend: Dict = {}
|
||||
model_max_budget: Dict = {}
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
## Common auth checks between jwt + key based auth
|
||||
"""
|
||||
Got Valid Token from Cache, DB
|
||||
Run checks for:
|
||||
Run checks for:
|
||||
|
||||
1. If user can call model
|
||||
2. If user is in budget
|
||||
3. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||
2. If user is in budget
|
||||
3. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||
"""
|
||||
import asyncio
|
||||
import re
|
||||
|
@ -270,6 +270,11 @@ def _is_api_route_allowed(
|
|||
if valid_token is None:
|
||||
raise Exception("Invalid proxy server token passed. valid_token=None.")
|
||||
|
||||
# Check if Virtual Key is allowed to call the route - Applies to all Roles
|
||||
RouteChecks.is_virtual_key_allowed_to_call_route(
|
||||
route=route, valid_token=valid_token
|
||||
)
|
||||
|
||||
if not _is_user_proxy_admin(user_obj=user_obj): # if non-admin
|
||||
RouteChecks.non_proxy_admin_allowed_routes_check(
|
||||
user_obj=user_obj,
|
||||
|
|
|
@ -16,6 +16,37 @@ from .auth_checks_organization import _user_is_org_admin
|
|||
|
||||
|
||||
class RouteChecks:
|
||||
@staticmethod
|
||||
def is_virtual_key_allowed_to_call_route(
|
||||
route: str, valid_token: UserAPIKeyAuth
|
||||
) -> bool:
|
||||
"""
|
||||
Raises Exception if Virtual Key is not allowed to call the route
|
||||
"""
|
||||
|
||||
# Only check if valid_token.allowed_routes is set and is a list with at least one item
|
||||
if valid_token.allowed_routes is None:
|
||||
return True
|
||||
if not isinstance(valid_token.allowed_routes, list):
|
||||
return True
|
||||
if len(valid_token.allowed_routes) == 0:
|
||||
return True
|
||||
|
||||
# explicit check for allowed routes
|
||||
if route in valid_token.allowed_routes:
|
||||
return True
|
||||
|
||||
# check if wildcard pattern is allowed
|
||||
for allowed_route in valid_token.allowed_routes:
|
||||
if RouteChecks._route_matches_wildcard_pattern(
|
||||
route=route, pattern=allowed_route
|
||||
):
|
||||
return True
|
||||
|
||||
raise Exception(
|
||||
f"Virtual key is not allowed to call this route. Only allowed to call routes: {valid_token.allowed_routes}. Tried to call route: {route}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def non_proxy_admin_allowed_routes_check(
|
||||
user_obj: Optional[LiteLLM_UserTable],
|
||||
|
@ -220,6 +251,35 @@ class RouteChecks:
|
|||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _route_matches_wildcard_pattern(route: str, pattern: str) -> bool:
|
||||
"""
|
||||
Check if route matches the wildcard pattern
|
||||
|
||||
eg.
|
||||
|
||||
pattern: "/scim/v2/*"
|
||||
route: "/scim/v2/Users"
|
||||
- returns: True
|
||||
|
||||
pattern: "/scim/v2/*"
|
||||
route: "/chat/completions"
|
||||
- returns: False
|
||||
|
||||
|
||||
pattern: "/scim/v2/*"
|
||||
route: "/scim/v2/Users/123"
|
||||
- returns: True
|
||||
|
||||
"""
|
||||
if pattern.endswith("*"):
|
||||
# Get the prefix (everything before the wildcard)
|
||||
prefix = pattern[:-1]
|
||||
return route.startswith(prefix)
|
||||
else:
|
||||
# If there's no wildcard, the pattern and route should match exactly
|
||||
return route == pattern
|
||||
|
||||
@staticmethod
|
||||
def check_route_access(route: str, allowed_routes: List[str]) -> bool:
|
||||
"""
|
||||
|
|
|
@ -433,14 +433,13 @@ class LiteLLMProxyRequestSetup:
|
|||
) -> Optional[List[str]]:
|
||||
tags = None
|
||||
|
||||
if llm_router and llm_router.enable_tag_filtering is True:
|
||||
# Check request headers for tags
|
||||
if "x-litellm-tags" in headers:
|
||||
if isinstance(headers["x-litellm-tags"], str):
|
||||
_tags = headers["x-litellm-tags"].split(",")
|
||||
tags = [tag.strip() for tag in _tags]
|
||||
elif isinstance(headers["x-litellm-tags"], list):
|
||||
tags = headers["x-litellm-tags"]
|
||||
# Check request headers for tags
|
||||
if "x-litellm-tags" in headers:
|
||||
if isinstance(headers["x-litellm-tags"], str):
|
||||
_tags = headers["x-litellm-tags"].split(",")
|
||||
tags = [tag.strip() for tag in _tags]
|
||||
elif isinstance(headers["x-litellm-tags"], list):
|
||||
tags = headers["x-litellm-tags"]
|
||||
# Check request body for tags
|
||||
if "tags" in data and isinstance(data["tags"], list):
|
||||
tags = data["tags"]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Union
|
||||
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
|
@ -39,6 +39,7 @@ def update_breakdown_metrics(
|
|||
provider_metadata: Dict[str, Dict[str, Any]],
|
||||
api_key_metadata: Dict[str, Dict[str, Any]],
|
||||
entity_id_field: Optional[str] = None,
|
||||
entity_metadata_field: Optional[Dict[str, dict]] = None,
|
||||
) -> BreakdownMetrics:
|
||||
"""Updates breakdown metrics for a single record using the existing update_metrics function"""
|
||||
|
||||
|
@ -74,7 +75,8 @@ def update_breakdown_metrics(
|
|||
metadata=KeyMetadata(
|
||||
key_alias=api_key_metadata.get(record.api_key, {}).get(
|
||||
"key_alias", None
|
||||
)
|
||||
),
|
||||
team_id=api_key_metadata.get(record.api_key, {}).get("team_id", None),
|
||||
), # Add any api_key-specific metadata here
|
||||
)
|
||||
breakdown.api_keys[record.api_key].metrics = update_metrics(
|
||||
|
@ -87,7 +89,10 @@ def update_breakdown_metrics(
|
|||
if entity_value:
|
||||
if entity_value not in breakdown.entities:
|
||||
breakdown.entities[entity_value] = MetricWithMetadata(
|
||||
metrics=SpendMetrics(), metadata={}
|
||||
metrics=SpendMetrics(),
|
||||
metadata=entity_metadata_field.get(entity_value, {})
|
||||
if entity_metadata_field
|
||||
else {},
|
||||
)
|
||||
breakdown.entities[entity_value].metrics = update_metrics(
|
||||
breakdown.entities[entity_value].metrics, record
|
||||
|
@ -96,17 +101,32 @@ def update_breakdown_metrics(
|
|||
return breakdown
|
||||
|
||||
|
||||
async def get_api_key_metadata(
|
||||
prisma_client: PrismaClient,
|
||||
api_keys: Set[str],
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
"""Update api key metadata for a single record."""
|
||||
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
|
||||
where={"token": {"in": list(api_keys)}}
|
||||
)
|
||||
return {
|
||||
k.token: {"key_alias": k.key_alias, "team_id": k.team_id} for k in key_records
|
||||
}
|
||||
|
||||
|
||||
async def get_daily_activity(
|
||||
prisma_client: Optional[PrismaClient],
|
||||
table_name: str,
|
||||
entity_id_field: str,
|
||||
entity_id: Optional[Union[str, List[str]]],
|
||||
entity_metadata_field: Optional[Dict[str, dict]],
|
||||
start_date: Optional[str],
|
||||
end_date: Optional[str],
|
||||
model: Optional[str],
|
||||
api_key: Optional[str],
|
||||
page: int,
|
||||
page_size: int,
|
||||
exclude_entity_ids: Optional[List[str]] = None,
|
||||
) -> SpendAnalyticsPaginatedResponse:
|
||||
"""Common function to get daily activity for any entity type."""
|
||||
if prisma_client is None:
|
||||
|
@ -139,6 +159,10 @@ async def get_daily_activity(
|
|||
where_conditions[entity_id_field] = {"in": entity_id}
|
||||
else:
|
||||
where_conditions[entity_id_field] = entity_id
|
||||
if exclude_entity_ids:
|
||||
where_conditions.setdefault(entity_id_field, {})["not"] = {
|
||||
"in": exclude_entity_ids
|
||||
}
|
||||
|
||||
# Get total count for pagination
|
||||
total_count = await getattr(prisma_client.db, table_name).count(
|
||||
|
@ -166,12 +190,7 @@ async def get_daily_activity(
|
|||
model_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
provider_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
if api_keys:
|
||||
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
|
||||
where={"token": {"in": list(api_keys)}}
|
||||
)
|
||||
api_key_metadata.update(
|
||||
{k.token: {"key_alias": k.key_alias} for k in key_records}
|
||||
)
|
||||
api_key_metadata = await get_api_key_metadata(prisma_client, api_keys)
|
||||
|
||||
# Process results
|
||||
results = []
|
||||
|
@ -198,6 +217,7 @@ async def get_daily_activity(
|
|||
provider_metadata,
|
||||
api_key_metadata,
|
||||
entity_id_field=entity_id_field,
|
||||
entity_metadata_field=entity_metadata_field,
|
||||
)
|
||||
|
||||
# Update total metrics
|
||||
|
|
|
@ -4,11 +4,19 @@ from litellm.proxy._types import (
|
|||
GenerateKeyRequest,
|
||||
LiteLLM_ManagementEndpoint_MetadataFields_Premium,
|
||||
LiteLLM_TeamTable,
|
||||
LitellmUserRoles,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.utils import _premium_user_check
|
||||
|
||||
|
||||
def _user_has_admin_view(user_api_key_dict: UserAPIKeyAuth) -> bool:
|
||||
return (
|
||||
user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
|
||||
or user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
||||
)
|
||||
|
||||
|
||||
def _is_user_team_admin(
|
||||
user_api_key_dict: UserAPIKeyAuth, team_obj: LiteLLM_TeamTable
|
||||
) -> bool:
|
||||
|
|
|
@ -25,6 +25,8 @@ from litellm._logging import verbose_proxy_logger
|
|||
from litellm.litellm_core_utils.duration_parser import duration_in_seconds
|
||||
from litellm.proxy._types import *
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.management_endpoints.common_daily_activity import get_daily_activity
|
||||
from litellm.proxy.management_endpoints.common_utils import _user_has_admin_view
|
||||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||
generate_key_helper_fn,
|
||||
prepare_metadata_fields,
|
||||
|
@ -34,8 +36,6 @@ from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
|
|||
from litellm.proxy.utils import handle_exception_on_proxy
|
||||
from litellm.types.proxy.management_endpoints.common_daily_activity import (
|
||||
BreakdownMetrics,
|
||||
DailySpendData,
|
||||
DailySpendMetadata,
|
||||
KeyMetadata,
|
||||
KeyMetricWithMetadata,
|
||||
LiteLLM_DailyUserSpend,
|
||||
|
@ -1382,136 +1382,22 @@ async def get_user_daily_activity(
|
|||
)
|
||||
|
||||
try:
|
||||
# Build filter conditions
|
||||
where_conditions: Dict[str, Any] = {
|
||||
"date": {
|
||||
"gte": start_date,
|
||||
"lte": end_date,
|
||||
}
|
||||
}
|
||||
entity_id: Optional[str] = None
|
||||
if not _user_has_admin_view(user_api_key_dict):
|
||||
entity_id = user_api_key_dict.user_id
|
||||
|
||||
if model:
|
||||
where_conditions["model"] = model
|
||||
if api_key:
|
||||
where_conditions["api_key"] = api_key
|
||||
|
||||
if (
|
||||
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
||||
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
||||
):
|
||||
where_conditions[
|
||||
"user_id"
|
||||
] = user_api_key_dict.user_id # only allow access to own data
|
||||
|
||||
# Get total count for pagination
|
||||
total_count = await prisma_client.db.litellm_dailyuserspend.count(
|
||||
where=where_conditions
|
||||
)
|
||||
|
||||
# Fetch paginated results
|
||||
daily_spend_data = await prisma_client.db.litellm_dailyuserspend.find_many(
|
||||
where=where_conditions,
|
||||
order=[
|
||||
{"date": "desc"},
|
||||
],
|
||||
skip=(page - 1) * page_size,
|
||||
take=page_size,
|
||||
)
|
||||
|
||||
daily_spend_data_pydantic_list = [
|
||||
LiteLLM_DailyUserSpend(**record.model_dump()) for record in daily_spend_data
|
||||
]
|
||||
|
||||
# Get all unique API keys from the spend data
|
||||
api_keys = set()
|
||||
for record in daily_spend_data_pydantic_list:
|
||||
if record.api_key:
|
||||
api_keys.add(record.api_key)
|
||||
|
||||
# Fetch key aliases in bulk
|
||||
|
||||
api_key_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
model_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
provider_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
if api_keys:
|
||||
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
|
||||
where={"token": {"in": list(api_keys)}}
|
||||
)
|
||||
api_key_metadata.update(
|
||||
{k.token: {"key_alias": k.key_alias} for k in key_records}
|
||||
)
|
||||
# Process results
|
||||
results = []
|
||||
total_metrics = SpendMetrics()
|
||||
|
||||
# Group data by date and other dimensions
|
||||
|
||||
grouped_data: Dict[str, Dict[str, Any]] = {}
|
||||
for record in daily_spend_data_pydantic_list:
|
||||
date_str = record.date
|
||||
if date_str not in grouped_data:
|
||||
grouped_data[date_str] = {
|
||||
"metrics": SpendMetrics(),
|
||||
"breakdown": BreakdownMetrics(),
|
||||
}
|
||||
|
||||
# Update metrics
|
||||
grouped_data[date_str]["metrics"] = update_metrics(
|
||||
grouped_data[date_str]["metrics"], record
|
||||
)
|
||||
# Update breakdowns
|
||||
grouped_data[date_str]["breakdown"] = update_breakdown_metrics(
|
||||
grouped_data[date_str]["breakdown"],
|
||||
record,
|
||||
model_metadata,
|
||||
provider_metadata,
|
||||
api_key_metadata,
|
||||
)
|
||||
|
||||
# Update total metrics
|
||||
total_metrics.spend += record.spend
|
||||
total_metrics.prompt_tokens += record.prompt_tokens
|
||||
total_metrics.completion_tokens += record.completion_tokens
|
||||
total_metrics.total_tokens += (
|
||||
record.prompt_tokens + record.completion_tokens
|
||||
)
|
||||
total_metrics.cache_read_input_tokens += record.cache_read_input_tokens
|
||||
total_metrics.cache_creation_input_tokens += (
|
||||
record.cache_creation_input_tokens
|
||||
)
|
||||
total_metrics.api_requests += record.api_requests
|
||||
total_metrics.successful_requests += record.successful_requests
|
||||
total_metrics.failed_requests += record.failed_requests
|
||||
|
||||
# Convert grouped data to response format
|
||||
for date_str, data in grouped_data.items():
|
||||
results.append(
|
||||
DailySpendData(
|
||||
date=datetime.strptime(date_str, "%Y-%m-%d").date(),
|
||||
metrics=data["metrics"],
|
||||
breakdown=data["breakdown"],
|
||||
)
|
||||
)
|
||||
|
||||
# Sort results by date
|
||||
results.sort(key=lambda x: x.date, reverse=True)
|
||||
|
||||
return SpendAnalyticsPaginatedResponse(
|
||||
results=results,
|
||||
metadata=DailySpendMetadata(
|
||||
total_spend=total_metrics.spend,
|
||||
total_prompt_tokens=total_metrics.prompt_tokens,
|
||||
total_completion_tokens=total_metrics.completion_tokens,
|
||||
total_tokens=total_metrics.total_tokens,
|
||||
total_api_requests=total_metrics.api_requests,
|
||||
total_successful_requests=total_metrics.successful_requests,
|
||||
total_failed_requests=total_metrics.failed_requests,
|
||||
total_cache_read_input_tokens=total_metrics.cache_read_input_tokens,
|
||||
total_cache_creation_input_tokens=total_metrics.cache_creation_input_tokens,
|
||||
page=page,
|
||||
total_pages=-(-total_count // page_size), # Ceiling division
|
||||
has_more=(page * page_size) < total_count,
|
||||
),
|
||||
return await get_daily_activity(
|
||||
prisma_client=prisma_client,
|
||||
table_name="litellm_dailyuserspend",
|
||||
entity_id_field="user_id",
|
||||
entity_id=entity_id,
|
||||
entity_metadata_field=None,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
@ -372,7 +372,7 @@ async def generate_key_fn( # noqa: PLR0915
|
|||
- soft_budget: Optional[float] - Specify soft budget for a given key. Will trigger a slack alert when this soft budget is reached.
|
||||
- tags: Optional[List[str]] - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
|
||||
- enforced_params: Optional[List[str]] - List of enforced params for the key (Enterprise only). [Docs](https://docs.litellm.ai/docs/proxy/enterprise#enforce-required-params-for-llm-requests)
|
||||
|
||||
- allowed_routes: Optional[list] - List of allowed routes for the key. Store the actual route or store a wildcard pattern for a set of routes. Example - ["/chat/completions", "/embeddings", "/keys/*"]
|
||||
Examples:
|
||||
|
||||
1. Allow users to turn on/off pii masking
|
||||
|
@ -577,9 +577,9 @@ async def generate_key_fn( # noqa: PLR0915
|
|||
request_type="key", **data_json, table_name="key"
|
||||
)
|
||||
|
||||
response[
|
||||
"soft_budget"
|
||||
] = data.soft_budget # include the user-input soft budget in the response
|
||||
response["soft_budget"] = (
|
||||
data.soft_budget
|
||||
) # include the user-input soft budget in the response
|
||||
|
||||
response = GenerateKeyResponse(**response)
|
||||
|
||||
|
@ -723,6 +723,7 @@ async def update_key_fn(
|
|||
- config: Optional[dict] - [DEPRECATED PARAM] Key-specific config.
|
||||
- temp_budget_increase: Optional[float] - Temporary budget increase for the key (Enterprise only).
|
||||
- temp_budget_expiry: Optional[str] - Expiry time for the temporary budget increase (Enterprise only).
|
||||
- allowed_routes: Optional[list] - List of allowed routes for the key. Store the actual route or store a wildcard pattern for a set of routes. Example - ["/chat/completions", "/embeddings", "/keys/*"]
|
||||
|
||||
Example:
|
||||
```bash
|
||||
|
@ -1167,6 +1168,7 @@ async def generate_key_helper_fn( # noqa: PLR0915
|
|||
send_invite_email: Optional[bool] = None,
|
||||
created_by: Optional[str] = None,
|
||||
updated_by: Optional[str] = None,
|
||||
allowed_routes: Optional[list] = None,
|
||||
):
|
||||
from litellm.proxy.proxy_server import (
|
||||
litellm_proxy_budget_name,
|
||||
|
@ -1272,6 +1274,7 @@ async def generate_key_helper_fn( # noqa: PLR0915
|
|||
"blocked": blocked,
|
||||
"created_by": created_by,
|
||||
"updated_by": updated_by,
|
||||
"allowed_routes": allowed_routes or [],
|
||||
}
|
||||
|
||||
if (
|
||||
|
@ -1467,10 +1470,10 @@ async def delete_verification_tokens(
|
|||
try:
|
||||
if prisma_client:
|
||||
tokens = [_hash_token_if_needed(token=key) for key in tokens]
|
||||
_keys_being_deleted: List[
|
||||
LiteLLM_VerificationToken
|
||||
] = await prisma_client.db.litellm_verificationtoken.find_many(
|
||||
where={"token": {"in": tokens}}
|
||||
_keys_being_deleted: List[LiteLLM_VerificationToken] = (
|
||||
await prisma_client.db.litellm_verificationtoken.find_many(
|
||||
where={"token": {"in": tokens}}
|
||||
)
|
||||
)
|
||||
|
||||
# Assuming 'db' is your Prisma Client instance
|
||||
|
@ -1572,9 +1575,9 @@ async def _rotate_master_key(
|
|||
from litellm.proxy.proxy_server import proxy_config
|
||||
|
||||
try:
|
||||
models: Optional[
|
||||
List
|
||||
] = await prisma_client.db.litellm_proxymodeltable.find_many()
|
||||
models: Optional[List] = (
|
||||
await prisma_client.db.litellm_proxymodeltable.find_many()
|
||||
)
|
||||
except Exception:
|
||||
models = None
|
||||
# 2. process model table
|
||||
|
@ -1861,11 +1864,11 @@ async def validate_key_list_check(
|
|||
param="user_id",
|
||||
code=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
complete_user_info_db_obj: Optional[
|
||||
BaseModel
|
||||
] = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": user_api_key_dict.user_id},
|
||||
include={"organization_memberships": True},
|
||||
complete_user_info_db_obj: Optional[BaseModel] = (
|
||||
await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": user_api_key_dict.user_id},
|
||||
include={"organization_memberships": True},
|
||||
)
|
||||
)
|
||||
|
||||
if complete_user_info_db_obj is None:
|
||||
|
@ -1926,10 +1929,10 @@ async def get_admin_team_ids(
|
|||
if complete_user_info is None:
|
||||
return []
|
||||
# Get all teams that user is an admin of
|
||||
teams: Optional[
|
||||
List[BaseModel]
|
||||
] = await prisma_client.db.litellm_teamtable.find_many(
|
||||
where={"team_id": {"in": complete_user_info.teams}}
|
||||
teams: Optional[List[BaseModel]] = (
|
||||
await prisma_client.db.litellm_teamtable.find_many(
|
||||
where={"team_id": {"in": complete_user_info.teams}}
|
||||
)
|
||||
)
|
||||
if teams is None:
|
||||
return []
|
||||
|
|
118
litellm/proxy/management_endpoints/scim/README_SCIM.md
Normal file
118
litellm/proxy/management_endpoints/scim/README_SCIM.md
Normal file
|
@ -0,0 +1,118 @@
|
|||
# SCIM v2 Integration for LiteLLM Proxy
|
||||
|
||||
This module provides SCIM v2 (System for Cross-domain Identity Management) endpoints for LiteLLM Proxy, allowing identity providers to manage users and teams (groups) within the LiteLLM ecosystem.
|
||||
|
||||
## Overview
|
||||
|
||||
SCIM is an open standard designed to simplify user management across different systems. This implementation allows compatible identity providers (like Okta, Azure AD, OneLogin, etc.) to automatically provision and deprovision users and groups in LiteLLM Proxy.
|
||||
|
||||
## Endpoints
|
||||
|
||||
The SCIM v2 API follows the standard specification with the following base URL:
|
||||
|
||||
```
|
||||
/scim/v2
|
||||
```
|
||||
|
||||
### User Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/Users` | GET | List all users with pagination support |
|
||||
| `/Users/{user_id}` | GET | Get a specific user by ID |
|
||||
| `/Users` | POST | Create a new user |
|
||||
| `/Users/{user_id}` | PUT | Update an existing user |
|
||||
| `/Users/{user_id}` | DELETE | Delete a user |
|
||||
|
||||
### Group Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/Groups` | GET | List all groups with pagination support |
|
||||
| `/Groups/{group_id}` | GET | Get a specific group by ID |
|
||||
| `/Groups` | POST | Create a new group |
|
||||
| `/Groups/{group_id}` | PUT | Update an existing group |
|
||||
| `/Groups/{group_id}` | DELETE | Delete a group |
|
||||
|
||||
## SCIM Schema
|
||||
|
||||
This implementation follows the standard SCIM v2 schema with the following mappings:
|
||||
|
||||
### Users
|
||||
|
||||
- SCIM User ID → LiteLLM `user_id`
|
||||
- SCIM User Email → LiteLLM `user_email`
|
||||
- SCIM User Group Memberships → LiteLLM User-Team relationships
|
||||
|
||||
### Groups
|
||||
|
||||
- SCIM Group ID → LiteLLM `team_id`
|
||||
- SCIM Group Display Name → LiteLLM `team_alias`
|
||||
- SCIM Group Members → LiteLLM Team members list
|
||||
|
||||
## Configuration
|
||||
|
||||
To enable SCIM in your identity provider, use the full URL to the SCIM endpoint:
|
||||
|
||||
```
|
||||
https://your-litellm-proxy-url/scim/v2
|
||||
```
|
||||
|
||||
Most identity providers will require authentication. You should use a valid LiteLLM API key with administrative privileges.
|
||||
|
||||
## Features
|
||||
|
||||
- Full CRUD operations for users and groups
|
||||
- Pagination support
|
||||
- Basic filtering support
|
||||
- Automatic synchronization of user-team relationships
|
||||
- Proper status codes and error handling per SCIM specification
|
||||
|
||||
|
||||
## Example Usage
|
||||
|
||||
### Listing Users
|
||||
|
||||
```
|
||||
GET /scim/v2/Users?startIndex=1&count=10
|
||||
```
|
||||
|
||||
### Creating a User
|
||||
|
||||
```json
|
||||
POST /scim/v2/Users
|
||||
{
|
||||
"schemas": ["urn:ietf:params:scim:schemas:core:2.0:User"],
|
||||
"userName": "john.doe@example.com",
|
||||
"active": true,
|
||||
"emails": [
|
||||
{
|
||||
"value": "john.doe@example.com",
|
||||
"primary": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Adding a User to Groups
|
||||
|
||||
```json
|
||||
PUT /scim/v2/Users/{user_id}
|
||||
{
|
||||
"schemas": ["urn:ietf:params:scim:schemas:core:2.0:User"],
|
||||
"userName": "john.doe@example.com",
|
||||
"active": true,
|
||||
"emails": [
|
||||
{
|
||||
"value": "john.doe@example.com",
|
||||
"primary": true
|
||||
}
|
||||
],
|
||||
"groups": [
|
||||
{
|
||||
"value": "team-123",
|
||||
"display": "Engineering Team"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
154
litellm/proxy/management_endpoints/scim/scim_transformations.py
Normal file
154
litellm/proxy/management_endpoints/scim/scim_transformations.py
Normal file
|
@ -0,0 +1,154 @@
|
|||
from typing import List, Union
|
||||
|
||||
from litellm.proxy._types import (
|
||||
LiteLLM_TeamTable,
|
||||
LiteLLM_UserTable,
|
||||
Member,
|
||||
NewUserResponse,
|
||||
)
|
||||
from litellm.types.proxy.management_endpoints.scim_v2 import *
|
||||
|
||||
|
||||
class ScimTransformations:
|
||||
DEFAULT_SCIM_NAME = "Unknown User"
|
||||
DEFAULT_SCIM_FAMILY_NAME = "Unknown Family Name"
|
||||
DEFAULT_SCIM_DISPLAY_NAME = "Unknown Display Name"
|
||||
DEFAULT_SCIM_MEMBER_VALUE = "Unknown Member Value"
|
||||
|
||||
@staticmethod
|
||||
async def transform_litellm_user_to_scim_user(
|
||||
user: Union[LiteLLM_UserTable, NewUserResponse],
|
||||
) -> SCIMUser:
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": "No database connected"}
|
||||
)
|
||||
|
||||
# Get user's teams/groups
|
||||
groups = []
|
||||
for team_id in user.teams or []:
|
||||
team = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
if team:
|
||||
team_alias = getattr(team, "team_alias", team.team_id)
|
||||
groups.append(SCIMUserGroup(value=team.team_id, display=team_alias))
|
||||
|
||||
user_created_at = user.created_at.isoformat() if user.created_at else None
|
||||
user_updated_at = user.updated_at.isoformat() if user.updated_at else None
|
||||
|
||||
emails = []
|
||||
if user.user_email:
|
||||
emails.append(SCIMUserEmail(value=user.user_email, primary=True))
|
||||
|
||||
return SCIMUser(
|
||||
schemas=["urn:ietf:params:scim:schemas:core:2.0:User"],
|
||||
id=user.user_id,
|
||||
userName=ScimTransformations._get_scim_user_name(user),
|
||||
displayName=ScimTransformations._get_scim_user_name(user),
|
||||
name=SCIMUserName(
|
||||
familyName=ScimTransformations._get_scim_family_name(user),
|
||||
givenName=ScimTransformations._get_scim_given_name(user),
|
||||
),
|
||||
emails=emails,
|
||||
groups=groups,
|
||||
active=True,
|
||||
meta={
|
||||
"resourceType": "User",
|
||||
"created": user_created_at,
|
||||
"lastModified": user_updated_at,
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_scim_user_name(user: Union[LiteLLM_UserTable, NewUserResponse]) -> str:
|
||||
"""
|
||||
SCIM requires a display name with length > 0
|
||||
|
||||
We use the same userName and displayName for SCIM users
|
||||
"""
|
||||
if user.user_email and len(user.user_email) > 0:
|
||||
return user.user_email
|
||||
return ScimTransformations.DEFAULT_SCIM_DISPLAY_NAME
|
||||
|
||||
@staticmethod
|
||||
def _get_scim_family_name(user: Union[LiteLLM_UserTable, NewUserResponse]) -> str:
|
||||
"""
|
||||
SCIM requires a family name with length > 0
|
||||
"""
|
||||
metadata = user.metadata or {}
|
||||
if "scim_metadata" in metadata:
|
||||
scim_metadata: LiteLLM_UserScimMetadata = LiteLLM_UserScimMetadata(
|
||||
**metadata["scim_metadata"]
|
||||
)
|
||||
if scim_metadata.familyName and len(scim_metadata.familyName) > 0:
|
||||
return scim_metadata.familyName
|
||||
|
||||
if user.user_alias and len(user.user_alias) > 0:
|
||||
return user.user_alias
|
||||
return ScimTransformations.DEFAULT_SCIM_FAMILY_NAME
|
||||
|
||||
@staticmethod
|
||||
def _get_scim_given_name(user: Union[LiteLLM_UserTable, NewUserResponse]) -> str:
|
||||
"""
|
||||
SCIM requires a given name with length > 0
|
||||
"""
|
||||
metadata = user.metadata or {}
|
||||
if "scim_metadata" in metadata:
|
||||
scim_metadata: LiteLLM_UserScimMetadata = LiteLLM_UserScimMetadata(
|
||||
**metadata["scim_metadata"]
|
||||
)
|
||||
if scim_metadata.givenName and len(scim_metadata.givenName) > 0:
|
||||
return scim_metadata.givenName
|
||||
|
||||
if user.user_alias and len(user.user_alias) > 0:
|
||||
return user.user_alias or ScimTransformations.DEFAULT_SCIM_NAME
|
||||
return ScimTransformations.DEFAULT_SCIM_NAME
|
||||
|
||||
@staticmethod
|
||||
async def transform_litellm_team_to_scim_group(
|
||||
team: Union[LiteLLM_TeamTable, dict],
|
||||
) -> SCIMGroup:
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": "No database connected"}
|
||||
)
|
||||
|
||||
if isinstance(team, dict):
|
||||
team = LiteLLM_TeamTable(**team)
|
||||
|
||||
# Get team members
|
||||
scim_members: List[SCIMMember] = []
|
||||
for member in team.members_with_roles or []:
|
||||
scim_members.append(
|
||||
SCIMMember(
|
||||
value=ScimTransformations._get_scim_member_value(member),
|
||||
display=member.user_email,
|
||||
)
|
||||
)
|
||||
|
||||
team_alias = getattr(team, "team_alias", team.team_id)
|
||||
team_created_at = team.created_at.isoformat() if team.created_at else None
|
||||
team_updated_at = team.updated_at.isoformat() if team.updated_at else None
|
||||
|
||||
return SCIMGroup(
|
||||
schemas=["urn:ietf:params:scim:schemas:core:2.0:Group"],
|
||||
id=team.team_id,
|
||||
displayName=team_alias,
|
||||
members=scim_members,
|
||||
meta={
|
||||
"resourceType": "Group",
|
||||
"created": team_created_at,
|
||||
"lastModified": team_updated_at,
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_scim_member_value(member: Member) -> str:
|
||||
if member.user_email:
|
||||
return member.user_email
|
||||
return ScimTransformations.DEFAULT_SCIM_MEMBER_VALUE
|
757
litellm/proxy/management_endpoints/scim/scim_v2.py
Normal file
757
litellm/proxy/management_endpoints/scim/scim_v2.py
Normal file
|
@ -0,0 +1,757 @@
|
|||
"""
|
||||
SCIM v2 Endpoints for LiteLLM Proxy using Internal User/Team Management
|
||||
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Body,
|
||||
Depends,
|
||||
HTTPException,
|
||||
Path,
|
||||
Query,
|
||||
Request,
|
||||
Response,
|
||||
)
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import (
|
||||
LiteLLM_UserTable,
|
||||
LitellmUserRoles,
|
||||
Member,
|
||||
NewTeamRequest,
|
||||
NewUserRequest,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
|
||||
from litellm.proxy.management_endpoints.scim.scim_transformations import (
|
||||
ScimTransformations,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.team_endpoints import new_team
|
||||
from litellm.types.proxy.management_endpoints.scim_v2 import *
|
||||
|
||||
scim_router = APIRouter(
|
||||
prefix="/scim/v2",
|
||||
tags=["SCIM v2"],
|
||||
)
|
||||
|
||||
|
||||
# Dependency to set the correct SCIM Content-Type
|
||||
async def set_scim_content_type(response: Response):
|
||||
"""Sets the Content-Type header to application/scim+json"""
|
||||
# Check if content type is already application/json, only override in that case
|
||||
# Avoids overriding for non-JSON responses or already correct types if they were set manually
|
||||
response.headers["Content-Type"] = "application/scim+json"
|
||||
|
||||
|
||||
# User Endpoints
|
||||
@scim_router.get(
|
||||
"/Users",
|
||||
response_model=SCIMListResponse,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def get_users(
|
||||
startIndex: int = Query(1, ge=1),
|
||||
count: int = Query(10, ge=1, le=100),
|
||||
filter: Optional[str] = Query(None),
|
||||
):
|
||||
"""
|
||||
Get a list of users according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
# Parse filter if provided (basic support)
|
||||
where_conditions = {}
|
||||
if filter:
|
||||
# Very basic filter support - only handling userName eq and emails.value eq
|
||||
if "userName eq" in filter:
|
||||
user_id = filter.split("userName eq ")[1].strip("\"'")
|
||||
where_conditions["user_id"] = user_id
|
||||
elif "emails.value eq" in filter:
|
||||
email = filter.split("emails.value eq ")[1].strip("\"'")
|
||||
where_conditions["user_email"] = email
|
||||
|
||||
# Get users from database
|
||||
users: List[LiteLLM_UserTable] = (
|
||||
await prisma_client.db.litellm_usertable.find_many(
|
||||
where=where_conditions,
|
||||
skip=(startIndex - 1),
|
||||
take=count,
|
||||
order={"created_at": "desc"},
|
||||
)
|
||||
)
|
||||
|
||||
# Get total count for pagination
|
||||
total_count = await prisma_client.db.litellm_usertable.count(
|
||||
where=where_conditions
|
||||
)
|
||||
|
||||
# Convert to SCIM format
|
||||
scim_users: List[SCIMUser] = []
|
||||
for user in users:
|
||||
scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(
|
||||
user=user
|
||||
)
|
||||
scim_users.append(scim_user)
|
||||
|
||||
return SCIMListResponse(
|
||||
totalResults=total_count,
|
||||
startIndex=startIndex,
|
||||
itemsPerPage=min(count, len(scim_users)),
|
||||
Resources=scim_users,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error retrieving users: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.get(
|
||||
"/Users/{user_id}",
|
||||
response_model=SCIMUser,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def get_user(
|
||||
user_id: str = Path(..., title="User ID"),
|
||||
):
|
||||
"""
|
||||
Get a single user by ID according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": user_id}
|
||||
)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=404, detail={"error": f"User not found with ID: {user_id}"}
|
||||
)
|
||||
|
||||
# Convert to SCIM format
|
||||
scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(user)
|
||||
return scim_user
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error retrieving user: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.post(
|
||||
"/Users",
|
||||
response_model=SCIMUser,
|
||||
status_code=201,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def create_user(
|
||||
user: SCIMUser = Body(...),
|
||||
):
|
||||
"""
|
||||
Create a user according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
verbose_proxy_logger.debug("SCIM CREATE USER request: %s", user)
|
||||
# Extract email from SCIM user
|
||||
user_email = None
|
||||
if user.emails and len(user.emails) > 0:
|
||||
user_email = user.emails[0].value
|
||||
|
||||
# Check if user already exists
|
||||
existing_user = None
|
||||
if user.userName:
|
||||
existing_user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": user.userName}
|
||||
)
|
||||
|
||||
if existing_user:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail={"error": f"User already exists with username: {user.userName}"},
|
||||
)
|
||||
|
||||
# Create user in database
|
||||
user_id = user.userName or str(uuid.uuid4())
|
||||
created_user = await new_user(
|
||||
data=NewUserRequest(
|
||||
user_id=user_id,
|
||||
user_email=user_email,
|
||||
user_alias=user.name.givenName,
|
||||
teams=[group.value for group in user.groups] if user.groups else None,
|
||||
metadata={
|
||||
"scim_metadata": LiteLLM_UserScimMetadata(
|
||||
givenName=user.name.givenName,
|
||||
familyName=user.name.familyName,
|
||||
).model_dump()
|
||||
},
|
||||
auto_create_key=False,
|
||||
),
|
||||
)
|
||||
scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(
|
||||
user=created_user
|
||||
)
|
||||
return scim_user
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error creating user: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.put(
|
||||
"/Users/{user_id}",
|
||||
response_model=SCIMUser,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def update_user(
|
||||
user_id: str = Path(..., title="User ID"),
|
||||
user: SCIMUser = Body(...),
|
||||
):
|
||||
"""
|
||||
Update a user according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
try:
|
||||
return None
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error updating user: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.delete(
|
||||
"/Users/{user_id}",
|
||||
status_code=204,
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def delete_user(
|
||||
user_id: str = Path(..., title="User ID"),
|
||||
):
|
||||
"""
|
||||
Delete a user according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
# Check if user exists
|
||||
existing_user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": user_id}
|
||||
)
|
||||
|
||||
if not existing_user:
|
||||
raise HTTPException(
|
||||
status_code=404, detail={"error": f"User not found with ID: {user_id}"}
|
||||
)
|
||||
|
||||
# Get teams user belongs to
|
||||
teams = []
|
||||
if existing_user.teams:
|
||||
for team_id in existing_user.teams:
|
||||
team = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
if team:
|
||||
teams.append(team)
|
||||
|
||||
# Remove user from all teams
|
||||
for team in teams:
|
||||
current_members = team.members or []
|
||||
if user_id in current_members:
|
||||
new_members = [m for m in current_members if m != user_id]
|
||||
await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": team.team_id}, data={"members": new_members}
|
||||
)
|
||||
|
||||
# Delete user
|
||||
await prisma_client.db.litellm_usertable.delete(where={"user_id": user_id})
|
||||
|
||||
return Response(status_code=204)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error deleting user: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.patch(
|
||||
"/Users/{user_id}",
|
||||
response_model=SCIMUser,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def patch_user(
|
||||
user_id: str = Path(..., title="User ID"),
|
||||
patch_ops: SCIMPatchOp = Body(...),
|
||||
):
|
||||
"""
|
||||
Patch a user according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
verbose_proxy_logger.debug("SCIM PATCH USER request: %s", patch_ops)
|
||||
|
||||
try:
|
||||
# Check if user exists
|
||||
existing_user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": user_id}
|
||||
)
|
||||
|
||||
if not existing_user:
|
||||
raise HTTPException(
|
||||
status_code=404, detail={"error": f"User not found with ID: {user_id}"}
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error patching user: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
# Group Endpoints
|
||||
@scim_router.get(
|
||||
"/Groups",
|
||||
response_model=SCIMListResponse,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def get_groups(
|
||||
startIndex: int = Query(1, ge=1),
|
||||
count: int = Query(10, ge=1, le=100),
|
||||
filter: Optional[str] = Query(None),
|
||||
):
|
||||
"""
|
||||
Get a list of groups according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
# Parse filter if provided (basic support)
|
||||
where_conditions = {}
|
||||
if filter:
|
||||
# Very basic filter support - only handling displayName eq
|
||||
if "displayName eq" in filter:
|
||||
team_alias = filter.split("displayName eq ")[1].strip("\"'")
|
||||
where_conditions["team_alias"] = team_alias
|
||||
|
||||
# Get teams from database
|
||||
teams = await prisma_client.db.litellm_teamtable.find_many(
|
||||
where=where_conditions,
|
||||
skip=(startIndex - 1),
|
||||
take=count,
|
||||
order={"created_at": "desc"},
|
||||
)
|
||||
|
||||
# Get total count for pagination
|
||||
total_count = await prisma_client.db.litellm_teamtable.count(
|
||||
where=where_conditions
|
||||
)
|
||||
|
||||
# Convert to SCIM format
|
||||
scim_groups = []
|
||||
for team in teams:
|
||||
# Get team members
|
||||
members = []
|
||||
for member_id in team.members or []:
|
||||
member = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": member_id}
|
||||
)
|
||||
if member:
|
||||
display_name = member.user_email or member.user_id
|
||||
members.append(
|
||||
SCIMMember(value=member.user_id, display=display_name)
|
||||
)
|
||||
|
||||
team_alias = getattr(team, "team_alias", team.team_id)
|
||||
team_created_at = team.created_at.isoformat() if team.created_at else None
|
||||
team_updated_at = team.updated_at.isoformat() if team.updated_at else None
|
||||
|
||||
scim_group = SCIMGroup(
|
||||
schemas=["urn:ietf:params:scim:schemas:core:2.0:Group"],
|
||||
id=team.team_id,
|
||||
displayName=team_alias,
|
||||
members=members,
|
||||
meta={
|
||||
"resourceType": "Group",
|
||||
"created": team_created_at,
|
||||
"lastModified": team_updated_at,
|
||||
},
|
||||
)
|
||||
scim_groups.append(scim_group)
|
||||
|
||||
return SCIMListResponse(
|
||||
totalResults=total_count,
|
||||
startIndex=startIndex,
|
||||
itemsPerPage=min(count, len(scim_groups)),
|
||||
Resources=scim_groups,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error retrieving groups: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.get(
|
||||
"/Groups/{group_id}",
|
||||
response_model=SCIMGroup,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def get_group(
|
||||
group_id: str = Path(..., title="Group ID"),
|
||||
):
|
||||
"""
|
||||
Get a single group by ID according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
team = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": group_id}
|
||||
)
|
||||
|
||||
if not team:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Group not found with ID: {group_id}"},
|
||||
)
|
||||
|
||||
scim_group = await ScimTransformations.transform_litellm_team_to_scim_group(
|
||||
team
|
||||
)
|
||||
return scim_group
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error retrieving group: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.post(
|
||||
"/Groups",
|
||||
response_model=SCIMGroup,
|
||||
status_code=201,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def create_group(
|
||||
group: SCIMGroup = Body(...),
|
||||
):
|
||||
"""
|
||||
Create a group according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
# Generate ID if not provided
|
||||
team_id = group.id or str(uuid.uuid4())
|
||||
|
||||
# Check if team already exists
|
||||
existing_team = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
|
||||
if existing_team:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail={"error": f"Group already exists with ID: {team_id}"},
|
||||
)
|
||||
|
||||
# Extract members
|
||||
members_with_roles: List[Member] = []
|
||||
if group.members:
|
||||
for member in group.members:
|
||||
# Check if user exists
|
||||
user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": member.value}
|
||||
)
|
||||
if user:
|
||||
members_with_roles.append(Member(user_id=member.value, role="user"))
|
||||
|
||||
# Create team in database
|
||||
created_team = await new_team(
|
||||
data=NewTeamRequest(
|
||||
team_id=team_id,
|
||||
team_alias=group.displayName,
|
||||
members_with_roles=members_with_roles,
|
||||
),
|
||||
http_request=Request(scope={"type": "http", "path": "/scim/v2/Groups"}),
|
||||
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
||||
)
|
||||
|
||||
scim_group = await ScimTransformations.transform_litellm_team_to_scim_group(
|
||||
created_team
|
||||
)
|
||||
return scim_group
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error creating group: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.put(
|
||||
"/Groups/{group_id}",
|
||||
response_model=SCIMGroup,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def update_group(
|
||||
group_id: str = Path(..., title="Group ID"),
|
||||
group: SCIMGroup = Body(...),
|
||||
):
|
||||
"""
|
||||
Update a group according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
# Check if team exists
|
||||
existing_team = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": group_id}
|
||||
)
|
||||
|
||||
if not existing_team:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Group not found with ID: {group_id}"},
|
||||
)
|
||||
|
||||
# Extract members
|
||||
member_ids = []
|
||||
if group.members:
|
||||
for member in group.members:
|
||||
# Check if user exists
|
||||
user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": member.value}
|
||||
)
|
||||
if user:
|
||||
member_ids.append(member.value)
|
||||
|
||||
# Update team in database
|
||||
existing_metadata = existing_team.metadata if existing_team.metadata else {}
|
||||
updated_team = await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": group_id},
|
||||
data={
|
||||
"team_alias": group.displayName,
|
||||
"members": member_ids,
|
||||
"metadata": {**existing_metadata, "scim_data": group.model_dump()},
|
||||
},
|
||||
)
|
||||
|
||||
# Handle user-team relationships
|
||||
current_members = existing_team.members or []
|
||||
|
||||
# Add new members to team
|
||||
for member_id in member_ids:
|
||||
if member_id not in current_members:
|
||||
user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": member_id}
|
||||
)
|
||||
if user:
|
||||
current_user_teams = user.teams or []
|
||||
if group_id not in current_user_teams:
|
||||
await prisma_client.db.litellm_usertable.update(
|
||||
where={"user_id": member_id},
|
||||
data={"teams": {"push": group_id}},
|
||||
)
|
||||
|
||||
# Remove former members from team
|
||||
for member_id in current_members:
|
||||
if member_id not in member_ids:
|
||||
user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": member_id}
|
||||
)
|
||||
if user:
|
||||
current_user_teams = user.teams or []
|
||||
if group_id in current_user_teams:
|
||||
new_teams = [t for t in current_user_teams if t != group_id]
|
||||
await prisma_client.db.litellm_usertable.update(
|
||||
where={"user_id": member_id}, data={"teams": new_teams}
|
||||
)
|
||||
|
||||
# Get updated members for response
|
||||
members = []
|
||||
for member_id in member_ids:
|
||||
user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": member_id}
|
||||
)
|
||||
if user:
|
||||
display_name = user.user_email or user.user_id
|
||||
members.append(SCIMMember(value=user.user_id, display=display_name))
|
||||
|
||||
team_created_at = (
|
||||
updated_team.created_at.isoformat() if updated_team.created_at else None
|
||||
)
|
||||
team_updated_at = (
|
||||
updated_team.updated_at.isoformat() if updated_team.updated_at else None
|
||||
)
|
||||
|
||||
return SCIMGroup(
|
||||
schemas=["urn:ietf:params:scim:schemas:core:2.0:Group"],
|
||||
id=group_id,
|
||||
displayName=updated_team.team_alias or group_id,
|
||||
members=members,
|
||||
meta={
|
||||
"resourceType": "Group",
|
||||
"created": team_created_at,
|
||||
"lastModified": team_updated_at,
|
||||
},
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error updating group: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.delete(
|
||||
"/Groups/{group_id}",
|
||||
status_code=204,
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def delete_group(
|
||||
group_id: str = Path(..., title="Group ID"),
|
||||
):
|
||||
"""
|
||||
Delete a group according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
try:
|
||||
# Check if team exists
|
||||
existing_team = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": group_id}
|
||||
)
|
||||
|
||||
if not existing_team:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Group not found with ID: {group_id}"},
|
||||
)
|
||||
|
||||
# For each member, remove this team from their teams list
|
||||
for member_id in existing_team.members or []:
|
||||
user = await prisma_client.db.litellm_usertable.find_unique(
|
||||
where={"user_id": member_id}
|
||||
)
|
||||
if user:
|
||||
current_teams = user.teams or []
|
||||
if group_id in current_teams:
|
||||
new_teams = [t for t in current_teams if t != group_id]
|
||||
await prisma_client.db.litellm_usertable.update(
|
||||
where={"user_id": member_id}, data={"teams": new_teams}
|
||||
)
|
||||
|
||||
# Delete team
|
||||
await prisma_client.db.litellm_teamtable.delete(where={"team_id": group_id})
|
||||
|
||||
return Response(status_code=204)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error deleting group: {str(e)}"}
|
||||
)
|
||||
|
||||
|
||||
@scim_router.patch(
|
||||
"/Groups/{group_id}",
|
||||
response_model=SCIMGroup,
|
||||
status_code=200,
|
||||
dependencies=[Depends(user_api_key_auth), Depends(set_scim_content_type)],
|
||||
)
|
||||
async def patch_group(
|
||||
group_id: str = Path(..., title="Group ID"),
|
||||
patch_ops: SCIMPatchOp = Body(...),
|
||||
):
|
||||
"""
|
||||
Patch a group according to SCIM v2 protocol
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No database connected"})
|
||||
|
||||
verbose_proxy_logger.debug("SCIM PATCH GROUP request: %s", patch_ops)
|
||||
|
||||
try:
|
||||
# Check if group exists
|
||||
existing_team = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": group_id}
|
||||
)
|
||||
|
||||
if not existing_team:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Group not found with ID: {group_id}"},
|
||||
)
|
||||
return None
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": f"Error patching group: {str(e)}"}
|
||||
)
|
|
@ -12,7 +12,7 @@ All /tag management endpoints
|
|||
|
||||
import datetime
|
||||
import json
|
||||
from typing import Dict, Optional
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
|
@ -25,6 +25,7 @@ from litellm.proxy.management_endpoints.common_daily_activity import (
|
|||
get_daily_activity,
|
||||
)
|
||||
from litellm.types.tag_management import (
|
||||
LiteLLM_DailyTagSpendTable,
|
||||
TagConfig,
|
||||
TagDeleteRequest,
|
||||
TagInfoRequest,
|
||||
|
@ -301,6 +302,7 @@ async def info_tag(
|
|||
"/tag/list",
|
||||
tags=["tag management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=List[TagConfig],
|
||||
)
|
||||
async def list_tags(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
|
@ -314,9 +316,33 @@ async def list_tags(
|
|||
raise HTTPException(status_code=500, detail="Database not connected")
|
||||
|
||||
try:
|
||||
## QUERY STORED TAGS ##
|
||||
tags_config = await _get_tags_config(prisma_client)
|
||||
list_of_tags = list(tags_config.values())
|
||||
return list_of_tags
|
||||
|
||||
## QUERY DYNAMIC TAGS ##
|
||||
dynamic_tags = await prisma_client.db.litellm_dailytagspend.find_many(
|
||||
distinct=["tag"],
|
||||
)
|
||||
|
||||
dynamic_tags_list = [
|
||||
LiteLLM_DailyTagSpendTable(**dynamic_tag.model_dump())
|
||||
for dynamic_tag in dynamic_tags
|
||||
]
|
||||
|
||||
dynamic_tag_config = [
|
||||
TagConfig(
|
||||
name=tag.tag,
|
||||
description="This is just a spend tag that was passed dynamically in a request. It does not control any LLM models.",
|
||||
models=None,
|
||||
created_at=tag.created_at.isoformat(),
|
||||
updated_at=tag.updated_at.isoformat(),
|
||||
)
|
||||
for tag in dynamic_tags_list
|
||||
if tag.tag not in tags_config
|
||||
]
|
||||
|
||||
return list_of_tags + dynamic_tag_config
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
@ -364,6 +390,7 @@ async def delete_tag(
|
|||
"/tag/daily/activity",
|
||||
response_model=SpendAnalyticsPaginatedResponse,
|
||||
tags=["tag management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def get_tag_daily_activity(
|
||||
tags: Optional[str] = None,
|
||||
|
@ -399,6 +426,7 @@ async def get_tag_daily_activity(
|
|||
table_name="litellm_dailytagspend",
|
||||
entity_id_field="tag",
|
||||
entity_id=tag_list,
|
||||
entity_metadata_field=None,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
model=model,
|
||||
|
|
|
@ -56,11 +56,16 @@ from litellm.proxy._types import (
|
|||
from litellm.proxy.auth.auth_checks import (
|
||||
allowed_route_check_inside_route,
|
||||
get_team_object,
|
||||
get_user_object,
|
||||
)
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.management_endpoints.common_utils import (
|
||||
_is_user_team_admin,
|
||||
_set_object_metadata_field,
|
||||
_user_has_admin_view,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.tag_management_endpoints import (
|
||||
get_daily_activity,
|
||||
)
|
||||
from litellm.proxy.management_helpers.team_member_permission_checks import (
|
||||
TeamMemberPermissionChecks,
|
||||
|
@ -75,6 +80,9 @@ from litellm.proxy.utils import (
|
|||
handle_exception_on_proxy,
|
||||
)
|
||||
from litellm.router import Router
|
||||
from litellm.types.proxy.management_endpoints.common_daily_activity import (
|
||||
SpendAnalyticsPaginatedResponse,
|
||||
)
|
||||
from litellm.types.proxy.management_endpoints.team_endpoints import (
|
||||
GetTeamMemberPermissionsResponse,
|
||||
UpdateTeamMemberPermissionsRequest,
|
||||
|
@ -515,12 +523,12 @@ async def update_team(
|
|||
updated_kv["model_id"] = _model_id
|
||||
|
||||
updated_kv = prisma_client.jsonify_team_object(db_data=updated_kv)
|
||||
team_row: Optional[LiteLLM_TeamTable] = (
|
||||
await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": data.team_id},
|
||||
data=updated_kv,
|
||||
include={"litellm_model_table": True}, # type: ignore
|
||||
)
|
||||
team_row: Optional[
|
||||
LiteLLM_TeamTable
|
||||
] = await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": data.team_id},
|
||||
data=updated_kv,
|
||||
include={"litellm_model_table": True}, # type: ignore
|
||||
)
|
||||
|
||||
if team_row is None or team_row.team_id is None:
|
||||
|
@ -1146,10 +1154,10 @@ async def delete_team(
|
|||
team_rows: List[LiteLLM_TeamTable] = []
|
||||
for team_id in data.team_ids:
|
||||
try:
|
||||
team_row_base: Optional[BaseModel] = (
|
||||
await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
team_row_base: Optional[
|
||||
BaseModel
|
||||
] = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
if team_row_base is None:
|
||||
raise Exception
|
||||
|
@ -1307,10 +1315,10 @@ async def team_info(
|
|||
)
|
||||
|
||||
try:
|
||||
team_info: Optional[BaseModel] = (
|
||||
await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
team_info: Optional[
|
||||
BaseModel
|
||||
] = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
if team_info is None:
|
||||
raise Exception
|
||||
|
@ -2079,3 +2087,113 @@ async def update_team_member_permissions(
|
|||
)
|
||||
|
||||
return updated_team
|
||||
|
||||
|
||||
@router.get(
|
||||
"/team/daily/activity",
|
||||
response_model=SpendAnalyticsPaginatedResponse,
|
||||
tags=["team management"],
|
||||
)
|
||||
async def get_team_daily_activity(
|
||||
team_ids: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
page: int = 1,
|
||||
page_size: int = 10,
|
||||
exclude_team_ids: Optional[str] = None,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Get daily activity for specific teams or all teams.
|
||||
|
||||
Args:
|
||||
team_ids (Optional[str]): Comma-separated list of team IDs to filter by. If not provided, returns data for all teams.
|
||||
start_date (Optional[str]): Start date for the activity period (YYYY-MM-DD).
|
||||
end_date (Optional[str]): End date for the activity period (YYYY-MM-DD).
|
||||
model (Optional[str]): Filter by model name.
|
||||
api_key (Optional[str]): Filter by API key.
|
||||
page (int): Page number for pagination.
|
||||
page_size (int): Number of items per page.
|
||||
exclude_team_ids (Optional[str]): Comma-separated list of team IDs to exclude.
|
||||
Returns:
|
||||
SpendAnalyticsPaginatedResponse: Paginated response containing daily activity data.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
proxy_logging_obj,
|
||||
user_api_key_cache,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||
)
|
||||
|
||||
# Convert comma-separated tags string to list if provided
|
||||
team_ids_list = team_ids.split(",") if team_ids else None
|
||||
exclude_team_ids_list: Optional[List[str]] = None
|
||||
|
||||
if exclude_team_ids:
|
||||
exclude_team_ids_list = (
|
||||
exclude_team_ids.split(",") if exclude_team_ids else None
|
||||
)
|
||||
|
||||
if not _user_has_admin_view(user_api_key_dict):
|
||||
user_info = await get_user_object(
|
||||
user_id=user_api_key_dict.user_id,
|
||||
prisma_client=prisma_client,
|
||||
user_id_upsert=False,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
parent_otel_span=user_api_key_dict.parent_otel_span,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
)
|
||||
if user_info is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"error": "User= {} not found".format(user_api_key_dict.user_id)
|
||||
},
|
||||
)
|
||||
if team_ids_list is None:
|
||||
team_ids_list = user_info.teams
|
||||
else:
|
||||
# check if all team_ids are in user_info.teams
|
||||
for team_id in team_ids_list:
|
||||
if team_id not in user_info.teams:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"error": "User does not belong to Team= {}. Call `/user/info` to see user's teams".format(
|
||||
team_id
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
## Fetch team aliases
|
||||
where_condition = {}
|
||||
if team_ids_list:
|
||||
where_condition["team_id"] = {"in": list(team_ids_list)}
|
||||
team_aliases = await prisma_client.db.litellm_teamtable.find_many(
|
||||
where=where_condition
|
||||
)
|
||||
team_alias_metadata = {
|
||||
t.team_id: {"team_alias": t.team_alias} for t in team_aliases
|
||||
}
|
||||
|
||||
return await get_daily_activity(
|
||||
prisma_client=prisma_client,
|
||||
table_name="litellm_dailyteamspend",
|
||||
entity_id_field="team_id",
|
||||
entity_id=team_ids_list,
|
||||
entity_metadata_field=team_alias_metadata,
|
||||
exclude_entity_ids=exclude_team_ids_list,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
|
|
@ -578,26 +578,70 @@ async def azure_proxy_route(
|
|||
)
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/vertex-ai/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
tags=["Vertex AI Pass-through", "pass-through"],
|
||||
include_in_schema=False,
|
||||
)
|
||||
@router.api_route(
|
||||
"/vertex_ai/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
tags=["Vertex AI Pass-through", "pass-through"],
|
||||
)
|
||||
async def vertex_proxy_route(
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseVertexAIPassThroughHandler(ABC):
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def get_default_base_target_url(vertex_location: Optional[str]) -> str:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def update_base_target_url_with_credential_location(
|
||||
base_target_url: str, vertex_location: Optional[str]
|
||||
) -> str:
|
||||
pass
|
||||
|
||||
|
||||
class VertexAIDiscoveryPassThroughHandler(BaseVertexAIPassThroughHandler):
|
||||
@staticmethod
|
||||
def get_default_base_target_url(vertex_location: Optional[str]) -> str:
|
||||
return "https://discoveryengine.googleapis.com/"
|
||||
|
||||
@staticmethod
|
||||
def update_base_target_url_with_credential_location(
|
||||
base_target_url: str, vertex_location: Optional[str]
|
||||
) -> str:
|
||||
return base_target_url
|
||||
|
||||
|
||||
class VertexAIPassThroughHandler(BaseVertexAIPassThroughHandler):
|
||||
@staticmethod
|
||||
def get_default_base_target_url(vertex_location: Optional[str]) -> str:
|
||||
return f"https://{vertex_location}-aiplatform.googleapis.com/"
|
||||
|
||||
@staticmethod
|
||||
def update_base_target_url_with_credential_location(
|
||||
base_target_url: str, vertex_location: Optional[str]
|
||||
) -> str:
|
||||
return f"https://{vertex_location}-aiplatform.googleapis.com/"
|
||||
|
||||
|
||||
def get_vertex_pass_through_handler(
|
||||
call_type: Literal["discovery", "aiplatform"]
|
||||
) -> BaseVertexAIPassThroughHandler:
|
||||
if call_type == "discovery":
|
||||
return VertexAIDiscoveryPassThroughHandler()
|
||||
elif call_type == "aiplatform":
|
||||
return VertexAIPassThroughHandler()
|
||||
else:
|
||||
raise ValueError(f"Invalid call type: {call_type}")
|
||||
|
||||
|
||||
async def _base_vertex_proxy_route(
|
||||
endpoint: str,
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
get_vertex_pass_through_handler: BaseVertexAIPassThroughHandler,
|
||||
user_api_key_dict: Optional[UserAPIKeyAuth] = None,
|
||||
):
|
||||
"""
|
||||
Call LiteLLM proxy via Vertex AI SDK.
|
||||
Base function for Vertex AI passthrough routes.
|
||||
Handles common logic for all Vertex AI services.
|
||||
|
||||
[Docs](https://docs.litellm.ai/docs/pass_through/vertex_ai)
|
||||
Default base_target_url is `https://{vertex_location}-aiplatform.googleapis.com/`
|
||||
"""
|
||||
from litellm.llms.vertex_ai.common_utils import (
|
||||
construct_target_url,
|
||||
|
@ -613,6 +657,14 @@ async def vertex_proxy_route(
|
|||
request=request,
|
||||
api_key=api_key_to_use,
|
||||
)
|
||||
|
||||
if user_api_key_dict is None:
|
||||
api_key_to_use = get_litellm_virtual_key(request=request)
|
||||
user_api_key_dict = await user_api_key_auth(
|
||||
request=request,
|
||||
api_key=api_key_to_use,
|
||||
)
|
||||
|
||||
vertex_project: Optional[str] = get_vertex_project_id_from_url(endpoint)
|
||||
vertex_location: Optional[str] = get_vertex_location_from_url(endpoint)
|
||||
vertex_credentials = passthrough_endpoint_router.get_vertex_credentials(
|
||||
|
@ -620,6 +672,10 @@ async def vertex_proxy_route(
|
|||
location=vertex_location,
|
||||
)
|
||||
|
||||
base_target_url = get_vertex_pass_through_handler.get_default_base_target_url(
|
||||
vertex_location
|
||||
)
|
||||
|
||||
headers_passed_through = False
|
||||
# Use headers from the incoming request if no vertex credentials are found
|
||||
if vertex_credentials is None or vertex_credentials.vertex_project is None:
|
||||
|
@ -628,7 +684,6 @@ async def vertex_proxy_route(
|
|||
verbose_proxy_logger.debug(
|
||||
"default_vertex_config not set, incoming request headers %s", headers
|
||||
)
|
||||
base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
|
||||
headers.pop("content-length", None)
|
||||
headers.pop("host", None)
|
||||
else:
|
||||
|
@ -636,9 +691,6 @@ async def vertex_proxy_route(
|
|||
vertex_location = vertex_credentials.vertex_location
|
||||
vertex_credentials_str = vertex_credentials.vertex_credentials
|
||||
|
||||
# Construct base URL for the target endpoint
|
||||
base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
|
||||
|
||||
_auth_header, vertex_project = await vertex_llm_base._ensure_access_token_async(
|
||||
credentials=vertex_credentials_str,
|
||||
project_id=vertex_project,
|
||||
|
@ -661,6 +713,13 @@ async def vertex_proxy_route(
|
|||
"Authorization": f"Bearer {auth_header}",
|
||||
}
|
||||
|
||||
base_target_url = get_vertex_pass_through_handler.update_base_target_url_with_credential_location(
|
||||
base_target_url, vertex_location
|
||||
)
|
||||
|
||||
if base_target_url is None:
|
||||
base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
|
||||
|
||||
request_route = encoded_endpoint
|
||||
verbose_proxy_logger.debug("request_route %s", request_route)
|
||||
|
||||
|
@ -707,6 +766,66 @@ async def vertex_proxy_route(
|
|||
return received_value
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/vertex_ai/discovery/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
tags=["Vertex AI Pass-through", "pass-through"],
|
||||
)
|
||||
async def vertex_discovery_proxy_route(
|
||||
endpoint: str,
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
):
|
||||
"""
|
||||
Call any vertex discovery endpoint using the proxy.
|
||||
|
||||
Just use `{PROXY_BASE_URL}/vertex_ai/discovery/{endpoint:path}`
|
||||
|
||||
Target url: `https://discoveryengine.googleapis.com`
|
||||
"""
|
||||
|
||||
discovery_handler = get_vertex_pass_through_handler(call_type="discovery")
|
||||
return await _base_vertex_proxy_route(
|
||||
endpoint=endpoint,
|
||||
request=request,
|
||||
fastapi_response=fastapi_response,
|
||||
get_vertex_pass_through_handler=discovery_handler,
|
||||
)
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/vertex-ai/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
tags=["Vertex AI Pass-through", "pass-through"],
|
||||
include_in_schema=False,
|
||||
)
|
||||
@router.api_route(
|
||||
"/vertex_ai/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
tags=["Vertex AI Pass-through", "pass-through"],
|
||||
)
|
||||
async def vertex_proxy_route(
|
||||
endpoint: str,
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Call LiteLLM proxy via Vertex AI SDK.
|
||||
|
||||
[Docs](https://docs.litellm.ai/docs/pass_through/vertex_ai)
|
||||
"""
|
||||
ai_platform_handler = get_vertex_pass_through_handler(call_type="aiplatform")
|
||||
|
||||
return await _base_vertex_proxy_route(
|
||||
endpoint=endpoint,
|
||||
request=request,
|
||||
fastapi_response=fastapi_response,
|
||||
get_vertex_pass_through_handler=ai_platform_handler,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/openai/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
|
|
|
@ -1,18 +1,13 @@
|
|||
model_list:
|
||||
- model_name: anhropic-auto-inject-cache-user-message
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
model: anhropic/claude-3-5-sonnet-20240620
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
cache_control_injection_points:
|
||||
- location: message
|
||||
role: user
|
||||
|
||||
- model_name: anhropic-auto-inject-cache-system-message
|
||||
model: openai/*
|
||||
- model_name: anthropic/*
|
||||
litellm_params:
|
||||
model: anhropic/claude-3-5-sonnet-20240620
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
cache_control_injection_points:
|
||||
- location: message
|
||||
role: user
|
||||
|
||||
|
||||
model: anthropic/*
|
||||
- model_name: gemini/*
|
||||
litellm_params:
|
||||
model: gemini/*
|
||||
litellm_settings:
|
||||
drop_params: true
|
||||
|
||||
|
|
|
@ -238,6 +238,7 @@ from litellm.proxy.management_endpoints.model_management_endpoints import (
|
|||
from litellm.proxy.management_endpoints.organization_endpoints import (
|
||||
router as organization_router,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.scim.scim_v2 import scim_router
|
||||
from litellm.proxy.management_endpoints.tag_management_endpoints import (
|
||||
router as tag_management_router,
|
||||
)
|
||||
|
@ -803,9 +804,9 @@ model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
|
|||
dual_cache=user_api_key_cache
|
||||
)
|
||||
litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
|
||||
redis_usage_cache: Optional[
|
||||
RedisCache
|
||||
] = None # redis cache used for tracking spend, tpm/rpm limits
|
||||
redis_usage_cache: Optional[RedisCache] = (
|
||||
None # redis cache used for tracking spend, tpm/rpm limits
|
||||
)
|
||||
user_custom_auth = None
|
||||
user_custom_key_generate = None
|
||||
user_custom_sso = None
|
||||
|
@ -1131,9 +1132,9 @@ async def update_cache( # noqa: PLR0915
|
|||
_id = "team_id:{}".format(team_id)
|
||||
try:
|
||||
# Fetch the existing cost for the given user
|
||||
existing_spend_obj: Optional[
|
||||
LiteLLM_TeamTable
|
||||
] = await user_api_key_cache.async_get_cache(key=_id)
|
||||
existing_spend_obj: Optional[LiteLLM_TeamTable] = (
|
||||
await user_api_key_cache.async_get_cache(key=_id)
|
||||
)
|
||||
if existing_spend_obj is None:
|
||||
# do nothing if team not in api key cache
|
||||
return
|
||||
|
@ -1807,13 +1808,6 @@ class ProxyConfig:
|
|||
if master_key and master_key.startswith("os.environ/"):
|
||||
master_key = get_secret(master_key) # type: ignore
|
||||
|
||||
if not isinstance(master_key, str):
|
||||
raise Exception(
|
||||
"Master key must be a string. Current type - {}".format(
|
||||
type(master_key)
|
||||
)
|
||||
)
|
||||
|
||||
if master_key is not None and isinstance(master_key, str):
|
||||
litellm_master_key_hash = hash_token(master_key)
|
||||
### USER API KEY CACHE IN-MEMORY TTL ###
|
||||
|
@ -2812,9 +2806,9 @@ async def initialize( # noqa: PLR0915
|
|||
user_api_base = api_base
|
||||
dynamic_config[user_model]["api_base"] = api_base
|
||||
if api_version:
|
||||
os.environ[
|
||||
"AZURE_API_VERSION"
|
||||
] = api_version # set this for azure - litellm can read this from the env
|
||||
os.environ["AZURE_API_VERSION"] = (
|
||||
api_version # set this for azure - litellm can read this from the env
|
||||
)
|
||||
if max_tokens: # model-specific param
|
||||
dynamic_config[user_model]["max_tokens"] = max_tokens
|
||||
if temperature: # model-specific param
|
||||
|
@ -7756,9 +7750,9 @@ async def get_config_list(
|
|||
hasattr(sub_field_info, "description")
|
||||
and sub_field_info.description is not None
|
||||
):
|
||||
nested_fields[
|
||||
idx
|
||||
].field_description = sub_field_info.description
|
||||
nested_fields[idx].field_description = (
|
||||
sub_field_info.description
|
||||
)
|
||||
idx += 1
|
||||
|
||||
_stored_in_db = None
|
||||
|
@ -8176,6 +8170,7 @@ app.include_router(key_management_router)
|
|||
app.include_router(internal_user_router)
|
||||
app.include_router(team_router)
|
||||
app.include_router(ui_sso_router)
|
||||
app.include_router(scim_router)
|
||||
app.include_router(organization_router)
|
||||
app.include_router(customer_router)
|
||||
app.include_router(spend_management_router)
|
||||
|
|
|
@ -169,6 +169,7 @@ model LiteLLM_VerificationToken {
|
|||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
allowed_routes String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
budget_id String?
|
||||
|
|
115
litellm/responses/litellm_completion_transformation/handler.py
Normal file
115
litellm/responses/litellm_completion_transformation/handler.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
"""
|
||||
Handler for transforming responses api requests to litellm.completion requests
|
||||
"""
|
||||
|
||||
from typing import Any, Coroutine, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm.responses.litellm_completion_transformation.streaming_iterator import (
|
||||
LiteLLMCompletionStreamingIterator,
|
||||
)
|
||||
from litellm.responses.litellm_completion_transformation.transformation import (
|
||||
LiteLLMCompletionResponsesConfig,
|
||||
)
|
||||
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIResponse,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse
|
||||
|
||||
|
||||
class LiteLLMCompletionTransformationHandler:
|
||||
|
||||
def response_api_handler(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
_is_async: bool = False,
|
||||
stream: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> Union[
|
||||
ResponsesAPIResponse,
|
||||
BaseResponsesAPIStreamingIterator,
|
||||
Coroutine[
|
||||
Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
|
||||
],
|
||||
]:
|
||||
litellm_completion_request: dict = (
|
||||
LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
|
||||
model=model,
|
||||
input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
stream=stream,
|
||||
**kwargs,
|
||||
)
|
||||
)
|
||||
|
||||
if _is_async:
|
||||
return self.async_response_api_handler(
|
||||
litellm_completion_request=litellm_completion_request,
|
||||
request_input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
litellm_completion_response: Union[
|
||||
ModelResponse, litellm.CustomStreamWrapper
|
||||
] = litellm.completion(
|
||||
**litellm_completion_request,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if isinstance(litellm_completion_response, ModelResponse):
|
||||
responses_api_response: ResponsesAPIResponse = (
|
||||
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||
chat_completion_response=litellm_completion_response,
|
||||
request_input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
||||
)
|
||||
|
||||
return responses_api_response
|
||||
|
||||
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
|
||||
return LiteLLMCompletionStreamingIterator(
|
||||
litellm_custom_stream_wrapper=litellm_completion_response,
|
||||
request_input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
||||
|
||||
async def async_response_api_handler(
|
||||
self,
|
||||
litellm_completion_request: dict,
|
||||
request_input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
**kwargs,
|
||||
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
|
||||
litellm_completion_response: Union[
|
||||
ModelResponse, litellm.CustomStreamWrapper
|
||||
] = await litellm.acompletion(
|
||||
**litellm_completion_request,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if isinstance(litellm_completion_response, ModelResponse):
|
||||
responses_api_response: ResponsesAPIResponse = (
|
||||
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||
chat_completion_response=litellm_completion_response,
|
||||
request_input=request_input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
||||
)
|
||||
|
||||
return responses_api_response
|
||||
|
||||
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
|
||||
return LiteLLMCompletionStreamingIterator(
|
||||
litellm_custom_stream_wrapper=litellm_completion_response,
|
||||
request_input=request_input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
Responses API has previous_response_id, which is the id of the previous response.
|
||||
|
||||
LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
|
||||
|
||||
This class handles that cache.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
|
||||
|
||||
RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
|
||||
MAX_PREV_SESSION_INPUTS = 50
|
||||
|
||||
|
||||
class ResponsesAPISessionElement(TypedDict, total=False):
|
||||
input: Union[str, ResponseInputParam]
|
||||
output: ResponsesAPIResponse
|
||||
response_id: str
|
||||
previous_response_id: Optional[str]
|
||||
|
||||
|
||||
class SessionHandler:
|
||||
|
||||
def add_completed_response_to_cache(
|
||||
self, response_id: str, session_element: ResponsesAPISessionElement
|
||||
):
|
||||
RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
|
||||
key=response_id, value=session_element
|
||||
)
|
||||
|
||||
def get_chain_of_previous_input_output_pairs(
|
||||
self, previous_response_id: str
|
||||
) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
|
||||
response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
|
||||
current_previous_response_id = previous_response_id
|
||||
|
||||
count_session_elements = 0
|
||||
while current_previous_response_id:
|
||||
if count_session_elements > MAX_PREV_SESSION_INPUTS:
|
||||
break
|
||||
session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
|
||||
key=current_previous_response_id
|
||||
)
|
||||
if session_element:
|
||||
response_api_inputs.append(
|
||||
(session_element.get("input"), session_element.get("output"))
|
||||
)
|
||||
current_previous_response_id = session_element.get(
|
||||
"previous_response_id"
|
||||
)
|
||||
else:
|
||||
break
|
||||
count_session_elements += 1
|
||||
return response_api_inputs
|
|
@ -0,0 +1,110 @@
|
|||
from typing import List, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm.main import stream_chunk_builder
|
||||
from litellm.responses.litellm_completion_transformation.transformation import (
|
||||
LiteLLMCompletionResponsesConfig,
|
||||
)
|
||||
from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIStreamEvents,
|
||||
ResponsesAPIStreamingResponse,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
TextCompletionResponse,
|
||||
)
|
||||
|
||||
|
||||
class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||
"""
|
||||
Async iterator for processing streaming responses from the Responses API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
|
||||
request_input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
):
|
||||
self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
|
||||
litellm_custom_stream_wrapper
|
||||
)
|
||||
self.request_input: Union[str, ResponseInputParam] = request_input
|
||||
self.responses_api_request: ResponsesAPIOptionalRequestParams = (
|
||||
responses_api_request
|
||||
)
|
||||
self.collected_chunks: List[ModelResponseStream] = []
|
||||
self.finished: bool = False
|
||||
|
||||
async def __anext__(
|
||||
self,
|
||||
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
|
||||
try:
|
||||
while True:
|
||||
if self.finished is True:
|
||||
raise StopAsyncIteration
|
||||
# Get the next chunk from the stream
|
||||
try:
|
||||
chunk = await self.litellm_custom_stream_wrapper.__anext__()
|
||||
self.collected_chunks.append(chunk)
|
||||
except StopAsyncIteration:
|
||||
self.finished = True
|
||||
response_completed_event = self._emit_response_completed_event()
|
||||
if response_completed_event:
|
||||
return response_completed_event
|
||||
else:
|
||||
raise StopAsyncIteration
|
||||
|
||||
except Exception as e:
|
||||
# Handle HTTP errors
|
||||
self.finished = True
|
||||
raise e
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(
|
||||
self,
|
||||
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
|
||||
try:
|
||||
while True:
|
||||
if self.finished is True:
|
||||
raise StopAsyncIteration
|
||||
# Get the next chunk from the stream
|
||||
try:
|
||||
chunk = self.litellm_custom_stream_wrapper.__next__()
|
||||
self.collected_chunks.append(chunk)
|
||||
except StopAsyncIteration:
|
||||
self.finished = True
|
||||
response_completed_event = self._emit_response_completed_event()
|
||||
if response_completed_event:
|
||||
return response_completed_event
|
||||
else:
|
||||
raise StopAsyncIteration
|
||||
|
||||
except Exception as e:
|
||||
# Handle HTTP errors
|
||||
self.finished = True
|
||||
raise e
|
||||
|
||||
def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
|
||||
litellm_model_response: Optional[
|
||||
Union[ModelResponse, TextCompletionResponse]
|
||||
] = stream_chunk_builder(chunks=self.collected_chunks)
|
||||
if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
|
||||
|
||||
return ResponseCompletedEvent(
|
||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||
response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||
request_input=self.request_input,
|
||||
chat_completion_response=litellm_model_response,
|
||||
responses_api_request=self.responses_api_request,
|
||||
),
|
||||
)
|
||||
else:
|
||||
return None
|
|
@ -0,0 +1,631 @@
|
|||
"""
|
||||
Handles transforming from Responses API -> LiteLLM completion (Chat Completion API)
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from openai.types.responses.tool_param import FunctionToolParam
|
||||
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.responses.litellm_completion_transformation.session_handler import (
|
||||
ResponsesAPISessionElement,
|
||||
SessionHandler,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionResponseMessage,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
ChatCompletionUserMessage,
|
||||
GenericChatCompletionMessage,
|
||||
Reasoning,
|
||||
ResponseAPIUsage,
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
)
|
||||
from litellm.types.responses.main import (
|
||||
GenericResponseOutputItem,
|
||||
GenericResponseOutputItemContentAnnotation,
|
||||
OutputFunctionToolCall,
|
||||
OutputText,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
ChatCompletionAnnotation,
|
||||
ChatCompletionMessageToolCall,
|
||||
Choices,
|
||||
Function,
|
||||
Message,
|
||||
ModelResponse,
|
||||
Usage,
|
||||
)
|
||||
|
||||
########### Initialize Classes used for Responses API ###########
|
||||
TOOL_CALLS_CACHE = InMemoryCache()
|
||||
RESPONSES_API_SESSION_HANDLER = SessionHandler()
|
||||
########### End of Initialize Classes used for Responses API ###########
|
||||
|
||||
|
||||
class LiteLLMCompletionResponsesConfig:
|
||||
|
||||
@staticmethod
|
||||
def transform_responses_api_request_to_chat_completion_request(
|
||||
model: str,
|
||||
input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
stream: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> dict:
|
||||
"""
|
||||
Transform a Responses API request into a Chat Completion request
|
||||
"""
|
||||
litellm_completion_request: dict = {
|
||||
"messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
|
||||
input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
previous_response_id=responses_api_request.get("previous_response_id"),
|
||||
),
|
||||
"model": model,
|
||||
"tool_choice": responses_api_request.get("tool_choice"),
|
||||
"tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
|
||||
responses_api_request.get("tools") or [] # type: ignore
|
||||
),
|
||||
"top_p": responses_api_request.get("top_p"),
|
||||
"user": responses_api_request.get("user"),
|
||||
"temperature": responses_api_request.get("temperature"),
|
||||
"parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
|
||||
"max_tokens": responses_api_request.get("max_output_tokens"),
|
||||
"stream": stream,
|
||||
"metadata": kwargs.get("metadata"),
|
||||
"service_tier": kwargs.get("service_tier"),
|
||||
# litellm specific params
|
||||
"custom_llm_provider": custom_llm_provider,
|
||||
}
|
||||
|
||||
# only pass non-None values
|
||||
litellm_completion_request = {
|
||||
k: v for k, v in litellm_completion_request.items() if v is not None
|
||||
}
|
||||
|
||||
return litellm_completion_request
|
||||
|
||||
@staticmethod
|
||||
def transform_responses_api_input_to_messages(
|
||||
input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
previous_response_id: Optional[str] = None,
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
Transform a Responses API input into a list of messages
|
||||
"""
|
||||
messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
] = []
|
||||
if responses_api_request.get("instructions"):
|
||||
messages.append(
|
||||
LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message(
|
||||
responses_api_request.get("instructions")
|
||||
)
|
||||
)
|
||||
|
||||
if previous_response_id:
|
||||
previous_response_pairs = (
|
||||
RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs(
|
||||
previous_response_id=previous_response_id
|
||||
)
|
||||
)
|
||||
if previous_response_pairs:
|
||||
for previous_response_pair in previous_response_pairs:
|
||||
chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
|
||||
input=previous_response_pair[0],
|
||||
)
|
||||
chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages(
|
||||
responses_api_output=previous_response_pair[1],
|
||||
)
|
||||
|
||||
messages.extend(chat_completion_input_messages)
|
||||
messages.extend(chat_completion_output_messages)
|
||||
|
||||
messages.extend(
|
||||
LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
|
||||
input=input,
|
||||
)
|
||||
)
|
||||
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _transform_response_input_param_to_chat_completion_message(
|
||||
input: Union[str, ResponseInputParam],
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
Transform a ResponseInputParam into a Chat Completion message
|
||||
"""
|
||||
messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
] = []
|
||||
tool_call_output_messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
] = []
|
||||
|
||||
if isinstance(input, str):
|
||||
messages.append(ChatCompletionUserMessage(role="user", content=input))
|
||||
elif isinstance(input, list):
|
||||
for _input in input:
|
||||
chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message(
|
||||
input_item=_input
|
||||
)
|
||||
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(
|
||||
input_item=_input
|
||||
):
|
||||
tool_call_output_messages.extend(chat_completion_messages)
|
||||
else:
|
||||
messages.extend(chat_completion_messages)
|
||||
|
||||
messages.extend(tool_call_output_messages)
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _ensure_tool_call_output_has_corresponding_tool_call(
|
||||
messages: List[Union[AllMessageValues, GenericChatCompletionMessage]],
|
||||
) -> bool:
|
||||
"""
|
||||
If any tool call output is present, ensure there is a corresponding tool call/tool_use block
|
||||
"""
|
||||
for message in messages:
|
||||
if message.get("role") == "tool":
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_input_item_to_chat_completion_message(
|
||||
input_item: Any,
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
Transform a Responses API input item into a Chat Completion message
|
||||
|
||||
- EasyInputMessageParam
|
||||
- Message
|
||||
- ResponseOutputMessageParam
|
||||
- ResponseFileSearchToolCallParam
|
||||
- ResponseComputerToolCallParam
|
||||
- ComputerCallOutput
|
||||
- ResponseFunctionWebSearchParam
|
||||
- ResponseFunctionToolCallParam
|
||||
- FunctionCallOutput
|
||||
- ResponseReasoningItemParam
|
||||
- ItemReference
|
||||
"""
|
||||
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item):
|
||||
# handle executed tool call results
|
||||
return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message(
|
||||
tool_call_output=input_item
|
||||
)
|
||||
else:
|
||||
return [
|
||||
GenericChatCompletionMessage(
|
||||
role=input_item.get("role") or "user",
|
||||
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
|
||||
input_item.get("content")
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _is_input_item_tool_call_output(input_item: Any) -> bool:
|
||||
"""
|
||||
Check if the input item is a tool call output
|
||||
"""
|
||||
return input_item.get("type") in [
|
||||
"function_call_output",
|
||||
"web_search_call",
|
||||
"computer_call_output",
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_tool_call_output_to_chat_completion_message(
|
||||
tool_call_output: Dict[str, Any],
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
ChatCompletionToolMessage is used to indicate the output from a tool call
|
||||
"""
|
||||
tool_output_message = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
content=tool_call_output.get("output") or "",
|
||||
tool_call_id=tool_call_output.get("call_id") or "",
|
||||
)
|
||||
|
||||
_tool_use_definition = TOOL_CALLS_CACHE.get_cache(
|
||||
key=tool_call_output.get("call_id") or "",
|
||||
)
|
||||
if _tool_use_definition:
|
||||
"""
|
||||
Append the tool use definition to the list of messages
|
||||
|
||||
|
||||
Providers like Anthropic require the tool use definition to be included with the tool output
|
||||
|
||||
- Input:
|
||||
{'function':
|
||||
arguments:'{"command": ["echo","<html>\\n<head>\\n <title>Hello</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n</body>\\n</html>",">","index.html"]}',
|
||||
name='shell',
|
||||
'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ',
|
||||
'type': 'function'
|
||||
}
|
||||
- Output:
|
||||
{
|
||||
"id": "toolu_018KFWsEySHjdKZPdUzXpymJ",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
|
||||
}
|
||||
}
|
||||
|
||||
"""
|
||||
function: dict = _tool_use_definition.get("function") or {}
|
||||
tool_call_chunk = ChatCompletionToolCallChunk(
|
||||
id=_tool_use_definition.get("id") or "",
|
||||
type=_tool_use_definition.get("type") or "function",
|
||||
function=ChatCompletionToolCallFunctionChunk(
|
||||
name=function.get("name") or "",
|
||||
arguments=function.get("arguments") or "",
|
||||
),
|
||||
index=0,
|
||||
)
|
||||
chat_completion_response_message = ChatCompletionResponseMessage(
|
||||
tool_calls=[tool_call_chunk],
|
||||
role="assistant",
|
||||
)
|
||||
return [chat_completion_response_message, tool_output_message]
|
||||
|
||||
return [tool_output_message]
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_content_to_chat_completion_content(
|
||||
content: Any,
|
||||
) -> Union[str, List[Union[str, Dict[str, Any]]]]:
|
||||
"""
|
||||
Transform a Responses API content into a Chat Completion content
|
||||
"""
|
||||
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, list):
|
||||
content_list: List[Union[str, Dict[str, Any]]] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
content_list.append(item)
|
||||
elif isinstance(item, dict):
|
||||
content_list.append(
|
||||
{
|
||||
"type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type(
|
||||
item.get("type") or "text"
|
||||
),
|
||||
"text": item.get("text"),
|
||||
}
|
||||
)
|
||||
return content_list
|
||||
else:
|
||||
raise ValueError(f"Invalid content type: {type(content)}")
|
||||
|
||||
@staticmethod
|
||||
def _get_chat_completion_request_content_type(content_type: str) -> str:
|
||||
"""
|
||||
Get the Chat Completion request content type
|
||||
"""
|
||||
# Responses API content has `input_` prefix, if it exists, remove it
|
||||
if content_type.startswith("input_"):
|
||||
return content_type[len("input_") :]
|
||||
else:
|
||||
return content_type
|
||||
|
||||
@staticmethod
|
||||
def transform_instructions_to_system_message(
|
||||
instructions: Optional[str],
|
||||
) -> ChatCompletionSystemMessage:
|
||||
"""
|
||||
Transform a Instructions into a system message
|
||||
"""
|
||||
return ChatCompletionSystemMessage(role="system", content=instructions or "")
|
||||
|
||||
@staticmethod
|
||||
def transform_responses_api_tools_to_chat_completion_tools(
|
||||
tools: Optional[List[FunctionToolParam]],
|
||||
) -> List[ChatCompletionToolParam]:
|
||||
"""
|
||||
Transform a Responses API tools into a Chat Completion tools
|
||||
"""
|
||||
if tools is None:
|
||||
return []
|
||||
chat_completion_tools: List[ChatCompletionToolParam] = []
|
||||
for tool in tools:
|
||||
chat_completion_tools.append(
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name=tool["name"],
|
||||
description=tool.get("description") or "",
|
||||
parameters=tool.get("parameters", {}),
|
||||
strict=tool.get("strict", False),
|
||||
),
|
||||
)
|
||||
)
|
||||
return chat_completion_tools
|
||||
|
||||
@staticmethod
|
||||
def transform_chat_completion_tools_to_responses_tools(
|
||||
chat_completion_response: ModelResponse,
|
||||
) -> List[OutputFunctionToolCall]:
|
||||
"""
|
||||
Transform a Chat Completion tools into a Responses API tools
|
||||
"""
|
||||
all_chat_completion_tools: List[ChatCompletionMessageToolCall] = []
|
||||
for choice in chat_completion_response.choices:
|
||||
if isinstance(choice, Choices):
|
||||
if choice.message.tool_calls:
|
||||
all_chat_completion_tools.extend(choice.message.tool_calls)
|
||||
for tool_call in choice.message.tool_calls:
|
||||
TOOL_CALLS_CACHE.set_cache(
|
||||
key=tool_call.id,
|
||||
value=tool_call,
|
||||
)
|
||||
|
||||
responses_tools: List[OutputFunctionToolCall] = []
|
||||
for tool in all_chat_completion_tools:
|
||||
if tool.type == "function":
|
||||
function_definition = tool.function
|
||||
responses_tools.append(
|
||||
OutputFunctionToolCall(
|
||||
name=function_definition.name or "",
|
||||
arguments=function_definition.get("arguments") or "",
|
||||
call_id=tool.id or "",
|
||||
id=tool.id or "",
|
||||
type="function_call", # critical this is "function_call" to work with tools like openai codex
|
||||
status=function_definition.get("status") or "completed",
|
||||
)
|
||||
)
|
||||
return responses_tools
|
||||
|
||||
@staticmethod
|
||||
def transform_chat_completion_response_to_responses_api_response(
|
||||
request_input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
chat_completion_response: ModelResponse,
|
||||
) -> ResponsesAPIResponse:
|
||||
"""
|
||||
Transform a Chat Completion response into a Responses API response
|
||||
"""
|
||||
responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse(
|
||||
id=chat_completion_response.id,
|
||||
created_at=chat_completion_response.created,
|
||||
model=chat_completion_response.model,
|
||||
object=chat_completion_response.object,
|
||||
error=getattr(chat_completion_response, "error", None),
|
||||
incomplete_details=getattr(
|
||||
chat_completion_response, "incomplete_details", None
|
||||
),
|
||||
instructions=getattr(chat_completion_response, "instructions", None),
|
||||
metadata=getattr(chat_completion_response, "metadata", {}),
|
||||
output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
|
||||
chat_completion_response=chat_completion_response,
|
||||
choices=getattr(chat_completion_response, "choices", []),
|
||||
),
|
||||
parallel_tool_calls=getattr(
|
||||
chat_completion_response, "parallel_tool_calls", False
|
||||
),
|
||||
temperature=getattr(chat_completion_response, "temperature", 0),
|
||||
tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
|
||||
tools=getattr(chat_completion_response, "tools", []),
|
||||
top_p=getattr(chat_completion_response, "top_p", None),
|
||||
max_output_tokens=getattr(
|
||||
chat_completion_response, "max_output_tokens", None
|
||||
),
|
||||
previous_response_id=getattr(
|
||||
chat_completion_response, "previous_response_id", None
|
||||
),
|
||||
reasoning=Reasoning(),
|
||||
status=getattr(chat_completion_response, "status", "completed"),
|
||||
text=ResponseTextConfig(),
|
||||
truncation=getattr(chat_completion_response, "truncation", None),
|
||||
usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
|
||||
chat_completion_response=chat_completion_response
|
||||
),
|
||||
user=getattr(chat_completion_response, "user", None),
|
||||
)
|
||||
|
||||
RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache(
|
||||
response_id=responses_api_response.id,
|
||||
session_element=ResponsesAPISessionElement(
|
||||
input=request_input,
|
||||
output=responses_api_response,
|
||||
response_id=responses_api_response.id,
|
||||
previous_response_id=responses_api_request.get("previous_response_id"),
|
||||
),
|
||||
)
|
||||
return responses_api_response
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_completion_choices_to_responses_output(
|
||||
chat_completion_response: ModelResponse,
|
||||
choices: List[Choices],
|
||||
) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]:
|
||||
responses_output: List[
|
||||
Union[GenericResponseOutputItem, OutputFunctionToolCall]
|
||||
] = []
|
||||
for choice in choices:
|
||||
responses_output.append(
|
||||
GenericResponseOutputItem(
|
||||
type="message",
|
||||
id=chat_completion_response.id,
|
||||
status=choice.finish_reason,
|
||||
role=choice.message.role,
|
||||
content=[
|
||||
LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text(
|
||||
choice.message
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools(
|
||||
chat_completion_response=chat_completion_response
|
||||
)
|
||||
responses_output.extend(tool_calls)
|
||||
return responses_output
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_outputs_to_chat_completion_messages(
|
||||
responses_api_output: ResponsesAPIResponse,
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
]
|
||||
]:
|
||||
messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
]
|
||||
] = []
|
||||
output_items = responses_api_output.output
|
||||
for _output_item in output_items:
|
||||
output_item: dict = dict(_output_item)
|
||||
if output_item.get("type") == "function_call":
|
||||
# handle function call output
|
||||
messages.append(
|
||||
LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call(
|
||||
tool_call=output_item
|
||||
)
|
||||
)
|
||||
else:
|
||||
# transform as generic ResponseOutputItem
|
||||
messages.append(
|
||||
GenericChatCompletionMessage(
|
||||
role=str(output_item.get("role")) or "user",
|
||||
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
|
||||
output_item.get("content")
|
||||
),
|
||||
)
|
||||
)
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_output_tool_call_to_chat_completion_output_tool_call(
|
||||
tool_call: dict,
|
||||
) -> ChatCompletionMessageToolCall:
|
||||
return ChatCompletionMessageToolCall(
|
||||
id=tool_call.get("id") or "",
|
||||
type="function",
|
||||
function=Function(
|
||||
name=tool_call.get("name") or "",
|
||||
arguments=tool_call.get("arguments") or "",
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_message_to_response_output_text(
|
||||
message: Message,
|
||||
) -> OutputText:
|
||||
return OutputText(
|
||||
type="output_text",
|
||||
text=message.content,
|
||||
annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations(
|
||||
annotations=getattr(message, "annotations", None)
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_completion_annotations_to_response_output_annotations(
|
||||
annotations: Optional[List[ChatCompletionAnnotation]],
|
||||
) -> List[GenericResponseOutputItemContentAnnotation]:
|
||||
response_output_annotations: List[
|
||||
GenericResponseOutputItemContentAnnotation
|
||||
] = []
|
||||
|
||||
if annotations is None:
|
||||
return response_output_annotations
|
||||
|
||||
for annotation in annotations:
|
||||
annotation_type = annotation.get("type")
|
||||
if annotation_type == "url_citation" and "url_citation" in annotation:
|
||||
url_citation = annotation["url_citation"]
|
||||
response_output_annotations.append(
|
||||
GenericResponseOutputItemContentAnnotation(
|
||||
type=annotation_type,
|
||||
start_index=url_citation.get("start_index"),
|
||||
end_index=url_citation.get("end_index"),
|
||||
url=url_citation.get("url"),
|
||||
title=url_citation.get("title"),
|
||||
)
|
||||
)
|
||||
# Handle other annotation types here
|
||||
|
||||
return response_output_annotations
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_completion_usage_to_responses_usage(
|
||||
chat_completion_response: ModelResponse,
|
||||
) -> ResponseAPIUsage:
|
||||
usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
|
||||
if usage is None:
|
||||
return ResponseAPIUsage(
|
||||
input_tokens=0,
|
||||
output_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
return ResponseAPIUsage(
|
||||
input_tokens=usage.prompt_tokens,
|
||||
output_tokens=usage.completion_tokens,
|
||||
total_tokens=usage.total_tokens,
|
||||
)
|
|
@ -10,6 +10,9 @@ from litellm.constants import request_timeout
|
|||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
|
||||
from litellm.responses.litellm_completion_transformation.handler import (
|
||||
LiteLLMCompletionTransformationHandler,
|
||||
)
|
||||
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||
from litellm.types.llms.openai import (
|
||||
Reasoning,
|
||||
|
@ -29,6 +32,7 @@ from .streaming_iterator import BaseResponsesAPIStreamingIterator
|
|||
####### ENVIRONMENT VARIABLES ###################
|
||||
# Initialize any necessary instances or variables here
|
||||
base_llm_http_handler = BaseLLMHTTPHandler()
|
||||
litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
|
||||
#################################################
|
||||
|
||||
|
||||
|
@ -178,19 +182,12 @@ def responses(
|
|||
)
|
||||
|
||||
# get provider config
|
||||
responses_api_provider_config: Optional[
|
||||
BaseResponsesAPIConfig
|
||||
] = ProviderConfigManager.get_provider_responses_api_config(
|
||||
model=model,
|
||||
provider=litellm.LlmProviders(custom_llm_provider),
|
||||
)
|
||||
|
||||
if responses_api_provider_config is None:
|
||||
raise litellm.BadRequestError(
|
||||
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
|
||||
ProviderConfigManager.get_provider_responses_api_config(
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
|
||||
provider=litellm.LlmProviders(custom_llm_provider),
|
||||
)
|
||||
)
|
||||
|
||||
local_vars.update(kwargs)
|
||||
# Get ResponsesAPIOptionalRequestParams with only valid parameters
|
||||
|
@ -200,6 +197,17 @@ def responses(
|
|||
)
|
||||
)
|
||||
|
||||
if responses_api_provider_config is None:
|
||||
return litellm_completion_transformation_handler.response_api_handler(
|
||||
model=model,
|
||||
input=input,
|
||||
responses_api_request=response_api_optional_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
_is_async=_is_async,
|
||||
stream=stream,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Get optional parameters for the responses API
|
||||
responses_api_request_params: Dict = (
|
||||
ResponsesAPIRequestUtils.get_optional_params_responses_api(
|
||||
|
|
|
@ -60,7 +60,7 @@ class ResponsesAPIRequestUtils:
|
|||
|
||||
@staticmethod
|
||||
def get_requested_response_api_optional_param(
|
||||
params: Dict[str, Any]
|
||||
params: Dict[str, Any],
|
||||
) -> ResponsesAPIOptionalRequestParams:
|
||||
"""
|
||||
Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
|
||||
|
@ -72,7 +72,9 @@ class ResponsesAPIRequestUtils:
|
|||
ResponsesAPIOptionalRequestParams instance with only the valid parameters
|
||||
"""
|
||||
valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
|
||||
filtered_params = {k: v for k, v in params.items() if k in valid_keys}
|
||||
filtered_params = {
|
||||
k: v for k, v in params.items() if k in valid_keys and v is not None
|
||||
}
|
||||
return cast(ResponsesAPIOptionalRequestParams, filtered_params)
|
||||
|
||||
|
||||
|
@ -88,7 +90,7 @@ class ResponseAPILoggingUtils:
|
|||
|
||||
@staticmethod
|
||||
def _transform_response_api_usage_to_chat_usage(
|
||||
usage: Union[dict, ResponseAPIUsage]
|
||||
usage: Union[dict, ResponseAPIUsage],
|
||||
) -> Usage:
|
||||
"""Tranforms the ResponseAPIUsage object to a Usage object"""
|
||||
response_api_usage: ResponseAPIUsage = (
|
||||
|
|
15
litellm/types/llms/base.py
Normal file
15
litellm/types/llms/base.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class BaseLiteLLMOpenAIResponseObject(BaseModel):
|
||||
def __getitem__(self, key):
|
||||
return self.__dict__[key]
|
||||
|
||||
def get(self, key, default=None):
|
||||
return self.__dict__.get(key, default)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.__dict__
|
||||
|
||||
def items(self):
|
||||
return self.__dict__.items()
|
|
@ -49,9 +49,16 @@ from openai.types.responses.response_create_params import (
|
|||
ToolChoice,
|
||||
ToolParam,
|
||||
)
|
||||
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
|
||||
from pydantic import BaseModel, Discriminator, Field, PrivateAttr
|
||||
from typing_extensions import Annotated, Dict, Required, TypedDict, override
|
||||
|
||||
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
|
||||
from litellm.types.responses.main import (
|
||||
GenericResponseOutputItem,
|
||||
OutputFunctionToolCall,
|
||||
)
|
||||
|
||||
FileContent = Union[IO[bytes], bytes, PathLike]
|
||||
|
||||
FileTypes = Union[
|
||||
|
@ -678,6 +685,11 @@ class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total
|
|||
cache_control: ChatCompletionCachedContent
|
||||
|
||||
|
||||
class GenericChatCompletionMessage(TypedDict, total=False):
|
||||
role: Required[str]
|
||||
content: Required[Union[str, List]]
|
||||
|
||||
|
||||
ValidUserMessageContentTypes = [
|
||||
"text",
|
||||
"image_url",
|
||||
|
@ -803,12 +815,12 @@ class OpenAIChatCompletionChunk(ChatCompletionChunk):
|
|||
|
||||
class Hyperparameters(BaseModel):
|
||||
batch_size: Optional[Union[str, int]] = None # "Number of examples in each batch."
|
||||
learning_rate_multiplier: Optional[
|
||||
Union[str, float]
|
||||
] = None # Scaling factor for the learning rate
|
||||
n_epochs: Optional[
|
||||
Union[str, int]
|
||||
] = None # "The number of epochs to train the model for"
|
||||
learning_rate_multiplier: Optional[Union[str, float]] = (
|
||||
None # Scaling factor for the learning rate
|
||||
)
|
||||
n_epochs: Optional[Union[str, int]] = (
|
||||
None # "The number of epochs to train the model for"
|
||||
)
|
||||
|
||||
|
||||
class FineTuningJobCreate(BaseModel):
|
||||
|
@ -835,18 +847,18 @@ class FineTuningJobCreate(BaseModel):
|
|||
|
||||
model: str # "The name of the model to fine-tune."
|
||||
training_file: str # "The ID of an uploaded file that contains training data."
|
||||
hyperparameters: Optional[
|
||||
Hyperparameters
|
||||
] = None # "The hyperparameters used for the fine-tuning job."
|
||||
suffix: Optional[
|
||||
str
|
||||
] = None # "A string of up to 18 characters that will be added to your fine-tuned model name."
|
||||
validation_file: Optional[
|
||||
str
|
||||
] = None # "The ID of an uploaded file that contains validation data."
|
||||
integrations: Optional[
|
||||
List[str]
|
||||
] = None # "A list of integrations to enable for your fine-tuning job."
|
||||
hyperparameters: Optional[Hyperparameters] = (
|
||||
None # "The hyperparameters used for the fine-tuning job."
|
||||
)
|
||||
suffix: Optional[str] = (
|
||||
None # "A string of up to 18 characters that will be added to your fine-tuned model name."
|
||||
)
|
||||
validation_file: Optional[str] = (
|
||||
None # "The ID of an uploaded file that contains validation data."
|
||||
)
|
||||
integrations: Optional[List[str]] = (
|
||||
None # "A list of integrations to enable for your fine-tuning job."
|
||||
)
|
||||
seed: Optional[int] = None # "The seed controls the reproducibility of the job."
|
||||
|
||||
|
||||
|
@ -887,7 +899,7 @@ class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
|
|||
temperature: Optional[float]
|
||||
text: Optional[ResponseTextConfigParam]
|
||||
tool_choice: Optional[ToolChoice]
|
||||
tools: Optional[Iterable[ToolParam]]
|
||||
tools: Optional[List[ToolParam]]
|
||||
top_p: Optional[float]
|
||||
truncation: Optional[Literal["auto", "disabled"]]
|
||||
user: Optional[str]
|
||||
|
@ -900,20 +912,6 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
|
|||
model: str
|
||||
|
||||
|
||||
class BaseLiteLLMOpenAIResponseObject(BaseModel):
|
||||
def __getitem__(self, key):
|
||||
return self.__dict__[key]
|
||||
|
||||
def get(self, key, default=None):
|
||||
return self.__dict__.get(key, default)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.__dict__
|
||||
|
||||
def items(self):
|
||||
return self.__dict__.items()
|
||||
|
||||
|
||||
class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
|
||||
reasoning_tokens: Optional[int] = None
|
||||
|
||||
|
@ -958,11 +956,14 @@ class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
|
|||
metadata: Optional[Dict]
|
||||
model: Optional[str]
|
||||
object: Optional[str]
|
||||
output: List[ResponseOutputItem]
|
||||
output: Union[
|
||||
List[ResponseOutputItem],
|
||||
List[Union[GenericResponseOutputItem, OutputFunctionToolCall]],
|
||||
]
|
||||
parallel_tool_calls: bool
|
||||
temperature: Optional[float]
|
||||
tool_choice: ToolChoice
|
||||
tools: List[Tool]
|
||||
tools: Union[List[Tool], List[ResponseFunctionToolCall]]
|
||||
top_p: Optional[float]
|
||||
max_output_tokens: Optional[int]
|
||||
previous_response_id: Optional[str]
|
||||
|
|
|
@ -69,6 +69,7 @@ class HttpxPartType(TypedDict, total=False):
|
|||
functionResponse: FunctionResponse
|
||||
executableCode: HttpxExecutableCode
|
||||
codeExecutionResult: HttpxCodeExecutionResult
|
||||
thought: bool
|
||||
|
||||
|
||||
class HttpxContentType(TypedDict, total=False):
|
||||
|
@ -166,6 +167,11 @@ class SafetSettingsConfig(TypedDict, total=False):
|
|||
method: HarmBlockMethod
|
||||
|
||||
|
||||
class GeminiThinkingConfig(TypedDict, total=False):
|
||||
includeThoughts: bool
|
||||
thinkingBudget: int
|
||||
|
||||
|
||||
class GenerationConfig(TypedDict, total=False):
|
||||
temperature: float
|
||||
top_p: float
|
||||
|
@ -181,6 +187,7 @@ class GenerationConfig(TypedDict, total=False):
|
|||
responseLogprobs: bool
|
||||
logprobs: int
|
||||
responseModalities: List[Literal["TEXT", "IMAGE", "AUDIO", "VIDEO"]]
|
||||
thinkingConfig: GeminiThinkingConfig
|
||||
|
||||
|
||||
class Tools(TypedDict, total=False):
|
||||
|
@ -212,6 +219,7 @@ class UsageMetadata(TypedDict, total=False):
|
|||
candidatesTokenCount: int
|
||||
cachedContentTokenCount: int
|
||||
promptTokensDetails: List[PromptTokensDetails]
|
||||
thoughtsTokenCount: int
|
||||
|
||||
|
||||
class CachedContent(TypedDict, total=False):
|
||||
|
|
|
@ -39,6 +39,7 @@ class KeyMetadata(BaseModel):
|
|||
"""Metadata for a key"""
|
||||
|
||||
key_alias: Optional[str] = None
|
||||
team_id: Optional[str] = None
|
||||
|
||||
|
||||
class KeyMetricWithMetadata(MetricBase):
|
||||
|
|
82
litellm/types/proxy/management_endpoints/scim_v2.py
Normal file
82
litellm/types/proxy/management_endpoints/scim_v2.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
from typing import Any, Dict, List, Literal, Optional, Union
|
||||
|
||||
from fastapi import HTTPException
|
||||
from pydantic import BaseModel, EmailStr
|
||||
|
||||
|
||||
class LiteLLM_UserScimMetadata(BaseModel):
|
||||
"""
|
||||
Scim metadata stored in LiteLLM_UserTable.metadata
|
||||
"""
|
||||
|
||||
givenName: Optional[str] = None
|
||||
familyName: Optional[str] = None
|
||||
|
||||
|
||||
# SCIM Resource Models
|
||||
class SCIMResource(BaseModel):
|
||||
schemas: List[str]
|
||||
id: Optional[str] = None
|
||||
externalId: Optional[str] = None
|
||||
meta: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class SCIMUserName(BaseModel):
|
||||
familyName: str
|
||||
givenName: str
|
||||
formatted: Optional[str] = None
|
||||
middleName: Optional[str] = None
|
||||
honorificPrefix: Optional[str] = None
|
||||
honorificSuffix: Optional[str] = None
|
||||
|
||||
|
||||
class SCIMUserEmail(BaseModel):
|
||||
value: EmailStr
|
||||
type: Optional[str] = None
|
||||
primary: Optional[bool] = None
|
||||
|
||||
|
||||
class SCIMUserGroup(BaseModel):
|
||||
value: str # Group ID
|
||||
display: Optional[str] = None # Group display name
|
||||
type: Optional[str] = "direct" # direct or indirect
|
||||
|
||||
|
||||
class SCIMUser(SCIMResource):
|
||||
userName: str
|
||||
name: SCIMUserName
|
||||
displayName: Optional[str] = None
|
||||
active: bool = True
|
||||
emails: Optional[List[SCIMUserEmail]] = None
|
||||
groups: Optional[List[SCIMUserGroup]] = None
|
||||
|
||||
|
||||
class SCIMMember(BaseModel):
|
||||
value: str # User ID
|
||||
display: Optional[str] = None # Username or email
|
||||
|
||||
|
||||
class SCIMGroup(SCIMResource):
|
||||
displayName: str
|
||||
members: Optional[List[SCIMMember]] = None
|
||||
|
||||
|
||||
# SCIM List Response Models
|
||||
class SCIMListResponse(BaseModel):
|
||||
schemas: List[str] = ["urn:ietf:params:scim:api:messages:2.0:ListResponse"]
|
||||
totalResults: int
|
||||
startIndex: Optional[int] = 1
|
||||
itemsPerPage: Optional[int] = 10
|
||||
Resources: Union[List[SCIMUser], List[SCIMGroup]]
|
||||
|
||||
|
||||
# SCIM PATCH Operation Models
|
||||
class SCIMPatchOperation(BaseModel):
|
||||
op: Literal["add", "remove", "replace"]
|
||||
path: Optional[str] = None
|
||||
value: Optional[Any] = None
|
||||
|
||||
|
||||
class SCIMPatchOp(BaseModel):
|
||||
schemas: List[str] = ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]
|
||||
Operations: List[SCIMPatchOperation]
|
48
litellm/types/responses/main.py
Normal file
48
litellm/types/responses/main.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
from typing import Literal
|
||||
|
||||
from typing_extensions import Any, List, Optional, TypedDict
|
||||
|
||||
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
|
||||
|
||||
|
||||
class GenericResponseOutputItemContentAnnotation(BaseLiteLLMOpenAIResponseObject):
|
||||
"""Annotation for content in a message"""
|
||||
|
||||
type: Optional[str]
|
||||
start_index: Optional[int]
|
||||
end_index: Optional[int]
|
||||
url: Optional[str]
|
||||
title: Optional[str]
|
||||
pass
|
||||
|
||||
|
||||
class OutputText(BaseLiteLLMOpenAIResponseObject):
|
||||
"""Text output content from an assistant message"""
|
||||
|
||||
type: Optional[str] # "output_text"
|
||||
text: Optional[str]
|
||||
annotations: Optional[List[GenericResponseOutputItemContentAnnotation]]
|
||||
|
||||
|
||||
class OutputFunctionToolCall(BaseLiteLLMOpenAIResponseObject):
|
||||
"""A tool call to run a function"""
|
||||
|
||||
arguments: Optional[str]
|
||||
call_id: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Optional[str] # "function_call"
|
||||
id: Optional[str]
|
||||
status: Literal["in_progress", "completed", "incomplete"]
|
||||
|
||||
|
||||
class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
|
||||
"""
|
||||
Generic response API output item
|
||||
|
||||
"""
|
||||
|
||||
type: str # "message"
|
||||
id: str
|
||||
status: str # "completed", "in_progress", etc.
|
||||
role: str # "assistant", "user", etc.
|
||||
content: List[OutputText]
|
|
@ -1,3 +1,4 @@
|
|||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
@ -30,3 +31,23 @@ class TagDeleteRequest(BaseModel):
|
|||
|
||||
class TagInfoRequest(BaseModel):
|
||||
names: List[str]
|
||||
|
||||
|
||||
class LiteLLM_DailyTagSpendTable(BaseModel):
|
||||
id: str
|
||||
tag: str
|
||||
date: str
|
||||
api_key: str
|
||||
model: str
|
||||
model_group: Optional[str]
|
||||
custom_llm_provider: Optional[str]
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
cache_read_input_tokens: int
|
||||
cache_creation_input_tokens: int
|
||||
spend: float
|
||||
api_requests: int
|
||||
successful_requests: int
|
||||
failed_requests: int
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
|
|
@ -150,6 +150,7 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
|
|||
] # only for vertex ai models
|
||||
output_cost_per_image: Optional[float]
|
||||
output_vector_size: Optional[int]
|
||||
output_cost_per_reasoning_token: Optional[float]
|
||||
output_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
||||
output_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
||||
output_cost_per_second: Optional[float] # for OpenAI Speech models
|
||||
|
@ -829,8 +830,11 @@ class Usage(CompletionUsage):
|
|||
# handle reasoning_tokens
|
||||
_completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None
|
||||
if reasoning_tokens:
|
||||
text_tokens = (
|
||||
completion_tokens - reasoning_tokens if completion_tokens else None
|
||||
)
|
||||
completion_tokens_details = CompletionTokensDetailsWrapper(
|
||||
reasoning_tokens=reasoning_tokens
|
||||
reasoning_tokens=reasoning_tokens, text_tokens=text_tokens
|
||||
)
|
||||
|
||||
# Ensure completion_tokens_details is properly handled
|
||||
|
|
|
@ -4563,6 +4563,9 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
output_cost_per_character=_model_info.get(
|
||||
"output_cost_per_character", None
|
||||
),
|
||||
output_cost_per_reasoning_token=_model_info.get(
|
||||
"output_cost_per_reasoning_token", None
|
||||
),
|
||||
output_cost_per_token_above_128k_tokens=_model_info.get(
|
||||
"output_cost_per_token_above_128k_tokens", None
|
||||
),
|
||||
|
@ -6602,6 +6605,8 @@ class ProviderConfigManager:
|
|||
) -> Optional[BaseResponsesAPIConfig]:
|
||||
if litellm.LlmProviders.OPENAI == provider:
|
||||
return litellm.OpenAIResponsesAPIConfig()
|
||||
elif litellm.LlmProviders.AZURE == provider:
|
||||
return litellm.AzureOpenAIResponsesAPIConfig()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -1471,6 +1471,64 @@
|
|||
"litellm_provider": "openai",
|
||||
"supported_endpoints": ["/v1/audio/speech"]
|
||||
},
|
||||
"azure/gpt-4.1": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 1047576,
|
||||
"max_output_tokens": 32768,
|
||||
"input_cost_per_token": 2e-6,
|
||||
"output_cost_per_token": 8e-6,
|
||||
"input_cost_per_token_batches": 1e-6,
|
||||
"output_cost_per_token_batches": 4e-6,
|
||||
"cache_read_input_token_cost": 0.5e-6,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": true,
|
||||
"supports_web_search": true,
|
||||
"search_context_cost_per_query": {
|
||||
"search_context_size_low": 30e-3,
|
||||
"search_context_size_medium": 35e-3,
|
||||
"search_context_size_high": 50e-3
|
||||
}
|
||||
},
|
||||
"azure/gpt-4.1-2025-04-14": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 1047576,
|
||||
"max_output_tokens": 32768,
|
||||
"input_cost_per_token": 2e-6,
|
||||
"output_cost_per_token": 8e-6,
|
||||
"input_cost_per_token_batches": 1e-6,
|
||||
"output_cost_per_token_batches": 4e-6,
|
||||
"cache_read_input_token_cost": 0.5e-6,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": true,
|
||||
"supports_web_search": true,
|
||||
"search_context_cost_per_query": {
|
||||
"search_context_size_low": 30e-3,
|
||||
"search_context_size_medium": 35e-3,
|
||||
"search_context_size_high": 50e-3
|
||||
}
|
||||
},
|
||||
"azure/gpt-4o-mini-realtime-preview-2024-12-17": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
|
@ -1647,6 +1705,23 @@
|
|||
"supports_system_messages": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"azure/o4-mini-2025-04-16": {
|
||||
"max_tokens": 100000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 100000,
|
||||
"input_cost_per_token": 1.1e-6,
|
||||
"output_cost_per_token": 4.4e-6,
|
||||
"cache_read_input_token_cost": 2.75e-7,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"azure/o3-mini-2025-01-31": {
|
||||
"max_tokens": 100000,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -5093,6 +5168,64 @@
|
|||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini/gemini-2.5-flash-preview-04-17": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 1e-6,
|
||||
"input_cost_per_token": 0.15e-6,
|
||||
"output_cost_per_token": 0.6e-6,
|
||||
"output_cost_per_reasoning_token": 3.5e-6,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"rpm": 10,
|
||||
"tpm": 250000,
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
||||
},
|
||||
"gemini-2.5-flash-preview-04-17": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 1e-6,
|
||||
"input_cost_per_token": 0.15e-6,
|
||||
"output_cost_per_token": 0.6e-6,
|
||||
"output_cost_per_reasoning_token": 3.5e-6,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_reasoning": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
||||
},
|
||||
"gemini-2.0-flash": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
|
|
6
poetry.lock
generated
6
poetry.lock
generated
|
@ -1780,14 +1780,14 @@ referencing = ">=0.31.0"
|
|||
|
||||
[[package]]
|
||||
name = "litellm-proxy-extras"
|
||||
version = "0.1.9"
|
||||
version = "0.1.11"
|
||||
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
||||
optional = true
|
||||
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
|
||||
groups = ["main"]
|
||||
markers = "extra == \"proxy\""
|
||||
files = [
|
||||
{file = "litellm_proxy_extras-0.1.9.tar.gz", hash = "sha256:e05966734c30f28a54203eca50172983a62b2748c5f5c4102ad81967efc1748d"},
|
||||
{file = "litellm_proxy_extras-0.1.11.tar.gz", hash = "sha256:9c170209b2f0b64c16aebe9a35866fa457142aee5fab51476e3b4b76d9ca4e9e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4384,4 +4384,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi",
|
|||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
content-hash = "6654224913bf322c344071bc9e418bf5e1bb61969a82a69ba7bd6cf1988891f8"
|
||||
content-hash = "40074b2e47aae8ece058be9a42eda3ca0618e27e4fc9d6529793816df7adb6c8"
|
||||
|
|
|
@ -7,13 +7,13 @@ model_list:
|
|||
id: "1"
|
||||
- model_name: gpt-3.5-turbo-end-user-test
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
|
@ -26,7 +26,7 @@ model_list:
|
|||
stream_timeout: 60
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.66.2"
|
||||
version = "1.66.3"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -55,7 +55,7 @@ websockets = {version = "^13.1.0", optional = true}
|
|||
boto3 = {version = "1.34.34", optional = true}
|
||||
redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
|
||||
mcp = {version = "1.5.0", optional = true, python = ">=3.10"}
|
||||
litellm-proxy-extras = {version = "0.1.9", optional = true}
|
||||
litellm-proxy-extras = {version = "0.1.11", optional = true}
|
||||
|
||||
[tool.poetry.extras]
|
||||
proxy = [
|
||||
|
@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.66.2"
|
||||
version = "1.66.3"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -37,7 +37,7 @@ sentry_sdk==2.21.0 # for sentry error handling
|
|||
detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests
|
||||
cryptography==43.0.1
|
||||
tzdata==2025.1 # IANA time zone database
|
||||
litellm-proxy-extras==0.1.9 # for proxy extras - e.g. prisma migrations
|
||||
litellm-proxy-extras==0.1.11 # for proxy extras - e.g. prisma migrations
|
||||
### LITELLM PACKAGE DEPENDENCIES
|
||||
python-dotenv==1.0.0 # for env
|
||||
tiktoken==0.8.0 # for calculating usage
|
||||
|
|
|
@ -169,6 +169,7 @@ model LiteLLM_VerificationToken {
|
|||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
allowed_routes String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
budget_id String?
|
||||
|
|
|
@ -10,7 +10,13 @@ from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
|||
StandardBuiltInToolCostTracking,
|
||||
)
|
||||
from litellm.types.llms.openai import FileSearchTool, WebSearchOptions
|
||||
from litellm.types.utils import ModelInfo, ModelResponse, StandardBuiltInToolsParams
|
||||
from litellm.types.utils import (
|
||||
CompletionTokensDetailsWrapper,
|
||||
ModelInfo,
|
||||
ModelResponse,
|
||||
PromptTokensDetailsWrapper,
|
||||
StandardBuiltInToolsParams,
|
||||
)
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
|
@ -20,6 +26,51 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_toke
|
|||
from litellm.types.utils import Usage
|
||||
|
||||
|
||||
def test_reasoning_tokens_gemini():
|
||||
model = "gemini-2.5-flash-preview-04-17"
|
||||
custom_llm_provider = "gemini"
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
usage = Usage(
|
||||
completion_tokens=1578,
|
||||
prompt_tokens=17,
|
||||
total_tokens=1595,
|
||||
completion_tokens_details=CompletionTokensDetailsWrapper(
|
||||
accepted_prediction_tokens=None,
|
||||
audio_tokens=None,
|
||||
reasoning_tokens=952,
|
||||
rejected_prediction_tokens=None,
|
||||
text_tokens=626,
|
||||
),
|
||||
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||
audio_tokens=None, cached_tokens=None, text_tokens=17, image_tokens=None
|
||||
),
|
||||
)
|
||||
model_cost_map = litellm.model_cost[model]
|
||||
prompt_cost, completion_cost = generic_cost_per_token(
|
||||
model=model,
|
||||
usage=usage,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
assert round(prompt_cost, 10) == round(
|
||||
model_cost_map["input_cost_per_token"] * usage.prompt_tokens,
|
||||
10,
|
||||
)
|
||||
assert round(completion_cost, 10) == round(
|
||||
(
|
||||
model_cost_map["output_cost_per_token"]
|
||||
* usage.completion_tokens_details.text_tokens
|
||||
)
|
||||
+ (
|
||||
model_cost_map["output_cost_per_reasoning_token"]
|
||||
* usage.completion_tokens_details.reasoning_tokens
|
||||
),
|
||||
10,
|
||||
)
|
||||
|
||||
|
||||
def test_generic_cost_per_token_above_200k_tokens():
|
||||
model = "gemini-2.5-pro-exp-03-25"
|
||||
custom_llm_provider = "vertex_ai"
|
||||
|
|
|
@ -201,13 +201,25 @@ class TestOpenAIResponsesAPIConfig:
|
|||
# Test with provided API base
|
||||
api_base = "https://custom-openai.example.com/v1"
|
||||
|
||||
result = self.config.get_complete_url(api_base=api_base, model=self.model)
|
||||
result = self.config.get_complete_url(
|
||||
api_base=api_base,
|
||||
model=self.model,
|
||||
api_key="test_api_key",
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
|
||||
assert result == "https://custom-openai.example.com/v1/responses"
|
||||
|
||||
# Test with litellm.api_base
|
||||
with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
|
||||
result = self.config.get_complete_url(api_base=None, model=self.model)
|
||||
result = self.config.get_complete_url(
|
||||
api_base=None,
|
||||
model=self.model,
|
||||
api_key="test_api_key",
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
|
||||
assert result == "https://litellm-api-base.example.com/v1/responses"
|
||||
|
||||
|
@ -217,7 +229,13 @@ class TestOpenAIResponsesAPIConfig:
|
|||
"litellm.llms.openai.responses.transformation.get_secret_str",
|
||||
return_value="https://env-api-base.example.com/v1",
|
||||
):
|
||||
result = self.config.get_complete_url(api_base=None, model=self.model)
|
||||
result = self.config.get_complete_url(
|
||||
api_base=None,
|
||||
model=self.model,
|
||||
api_key="test_api_key",
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
|
||||
assert result == "https://env-api-base.example.com/v1/responses"
|
||||
|
||||
|
@ -227,13 +245,25 @@ class TestOpenAIResponsesAPIConfig:
|
|||
"litellm.llms.openai.responses.transformation.get_secret_str",
|
||||
return_value=None,
|
||||
):
|
||||
result = self.config.get_complete_url(api_base=None, model=self.model)
|
||||
result = self.config.get_complete_url(
|
||||
api_base=None,
|
||||
model=self.model,
|
||||
api_key="test_api_key",
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
|
||||
assert result == "https://api.openai.com/v1/responses"
|
||||
|
||||
# Test with trailing slash in API base
|
||||
api_base = "https://custom-openai.example.com/v1/"
|
||||
|
||||
result = self.config.get_complete_url(api_base=api_base, model=self.model)
|
||||
result = self.config.get_complete_url(
|
||||
api_base=api_base,
|
||||
model=self.model,
|
||||
api_key="test_api_key",
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
|
||||
assert result == "https://custom-openai.example.com/v1/responses"
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
import asyncio
|
||||
from typing import List, cast
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm import ModelResponse
|
||||
|
@ -9,8 +11,6 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
|||
VertexGeminiConfig,
|
||||
)
|
||||
from litellm.types.utils import ChoiceLogprobs
|
||||
from pydantic import BaseModel
|
||||
from typing import List, cast
|
||||
|
||||
|
||||
def test_top_logprobs():
|
||||
|
@ -66,7 +66,6 @@ def test_get_model_name_from_gemini_spec_model():
|
|||
assert result == "ft-uuid-123"
|
||||
|
||||
|
||||
|
||||
def test_vertex_ai_response_schema_dict():
|
||||
v = VertexGeminiConfig()
|
||||
transformed_request = v.map_openai_params(
|
||||
|
@ -221,3 +220,22 @@ def test_vertex_ai_retain_property_ordering():
|
|||
schema = transformed_request["response_schema"]
|
||||
# should leave existing value alone, despite dictionary ordering
|
||||
assert schema["propertyOrdering"] == ["thought", "output"]
|
||||
|
||||
|
||||
def test_vertex_ai_thinking_output_part():
|
||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
VertexGeminiConfig,
|
||||
)
|
||||
from litellm.types.llms.vertex_ai import HttpxPartType
|
||||
|
||||
v = VertexGeminiConfig()
|
||||
parts = [
|
||||
HttpxPartType(
|
||||
thought=True,
|
||||
text="I'm thinking...",
|
||||
),
|
||||
HttpxPartType(text="Hello world"),
|
||||
]
|
||||
content, reasoning_content = v.get_assistant_content_message(parts=parts)
|
||||
assert content == "Hello world"
|
||||
assert reasoning_content == "I'm thinking..."
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
from typing import Optional, cast
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../../")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.proxy._types import LiteLLM_TeamTable, LiteLLM_UserTable, Member
|
||||
from litellm.proxy.management_endpoints.scim.scim_transformations import (
|
||||
ScimTransformations,
|
||||
)
|
||||
from litellm.types.proxy.management_endpoints.scim_v2 import SCIMGroup, SCIMUser
|
||||
|
||||
|
||||
# Mock data
|
||||
@pytest.fixture
|
||||
def mock_user():
|
||||
return LiteLLM_UserTable(
|
||||
user_id="user-123",
|
||||
user_email="test@example.com",
|
||||
user_alias="Test User",
|
||||
teams=["team-1", "team-2"],
|
||||
created_at=None,
|
||||
updated_at=None,
|
||||
metadata={},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_user_with_scim_metadata():
|
||||
return LiteLLM_UserTable(
|
||||
user_id="user-456",
|
||||
user_email="test2@example.com",
|
||||
user_alias="Test User 2",
|
||||
teams=["team-1"],
|
||||
created_at=None,
|
||||
updated_at=None,
|
||||
metadata={"scim_metadata": {"givenName": "Test", "familyName": "User"}},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_user_minimal():
|
||||
return LiteLLM_UserTable(
|
||||
user_id="user-789",
|
||||
user_email=None,
|
||||
user_alias=None,
|
||||
teams=[],
|
||||
created_at=None,
|
||||
updated_at=None,
|
||||
metadata={},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_team():
|
||||
return LiteLLM_TeamTable(
|
||||
team_id="team-1",
|
||||
team_alias="Test Team",
|
||||
members_with_roles=[
|
||||
Member(user_id="user-123", user_email="test@example.com", role="admin"),
|
||||
Member(user_id="user-456", user_email="test2@example.com", role="user"),
|
||||
],
|
||||
created_at=None,
|
||||
updated_at=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_team_minimal():
|
||||
return LiteLLM_TeamTable(
|
||||
team_id="team-2",
|
||||
team_alias="Test Team 2",
|
||||
members_with_roles=[Member(user_id="user-789", user_email=None, role="user")],
|
||||
created_at=None,
|
||||
updated_at=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_prisma_client():
|
||||
mock_client = MagicMock()
|
||||
mock_db = MagicMock()
|
||||
mock_client.db = mock_db
|
||||
|
||||
mock_find_unique = AsyncMock()
|
||||
mock_db.litellm_teamtable.find_unique = mock_find_unique
|
||||
|
||||
return mock_client, mock_find_unique
|
||||
|
||||
|
||||
class TestScimTransformations:
|
||||
@pytest.mark.asyncio
|
||||
async def test_transform_litellm_user_to_scim_user(
|
||||
self, mock_user, mock_prisma_client
|
||||
):
|
||||
mock_client, mock_find_unique = mock_prisma_client
|
||||
|
||||
# Mock the team lookup
|
||||
team1 = LiteLLM_TeamTable(
|
||||
team_id="team-1", team_alias="Team One", members_with_roles=[]
|
||||
)
|
||||
team2 = LiteLLM_TeamTable(
|
||||
team_id="team-2", team_alias="Team Two", members_with_roles=[]
|
||||
)
|
||||
|
||||
mock_find_unique.side_effect = [team1, team2]
|
||||
|
||||
with patch("litellm.proxy.proxy_server.prisma_client", mock_client):
|
||||
scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(
|
||||
mock_user
|
||||
)
|
||||
|
||||
assert scim_user.id == mock_user.user_id
|
||||
assert scim_user.userName == mock_user.user_email
|
||||
assert scim_user.displayName == mock_user.user_email
|
||||
assert scim_user.name.familyName == mock_user.user_alias
|
||||
assert scim_user.name.givenName == mock_user.user_alias
|
||||
assert len(scim_user.emails) == 1
|
||||
assert scim_user.emails[0].value == mock_user.user_email
|
||||
assert len(scim_user.groups) == 2
|
||||
assert scim_user.groups[0].value == "team-1"
|
||||
assert scim_user.groups[0].display == "Team One"
|
||||
assert scim_user.groups[1].value == "team-2"
|
||||
assert scim_user.groups[1].display == "Team Two"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transform_user_with_scim_metadata(
|
||||
self, mock_user_with_scim_metadata, mock_prisma_client
|
||||
):
|
||||
mock_client, mock_find_unique = mock_prisma_client
|
||||
|
||||
# Mock the team lookup
|
||||
team1 = LiteLLM_TeamTable(
|
||||
team_id="team-1", team_alias="Team One", members_with_roles=[]
|
||||
)
|
||||
mock_find_unique.return_value = team1
|
||||
|
||||
with patch("litellm.proxy.proxy_server.prisma_client", mock_client):
|
||||
scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(
|
||||
mock_user_with_scim_metadata
|
||||
)
|
||||
|
||||
assert scim_user.name.givenName == "Test"
|
||||
assert scim_user.name.familyName == "User"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transform_litellm_team_to_scim_group(
|
||||
self, mock_team, mock_prisma_client
|
||||
):
|
||||
mock_client, _ = mock_prisma_client
|
||||
|
||||
with patch("litellm.proxy.proxy_server.prisma_client", mock_client):
|
||||
scim_group = await ScimTransformations.transform_litellm_team_to_scim_group(
|
||||
mock_team
|
||||
)
|
||||
|
||||
assert scim_group.id == mock_team.team_id
|
||||
assert scim_group.displayName == mock_team.team_alias
|
||||
assert len(scim_group.members) == 2
|
||||
assert scim_group.members[0].value == "test@example.com"
|
||||
assert scim_group.members[0].display == "test@example.com"
|
||||
assert scim_group.members[1].value == "test2@example.com"
|
||||
assert scim_group.members[1].display == "test2@example.com"
|
||||
|
||||
def test_get_scim_user_name(self, mock_user, mock_user_minimal):
|
||||
# User with email
|
||||
result = ScimTransformations._get_scim_user_name(mock_user)
|
||||
assert result == mock_user.user_email
|
||||
|
||||
# User without email
|
||||
result = ScimTransformations._get_scim_user_name(mock_user_minimal)
|
||||
assert result == ScimTransformations.DEFAULT_SCIM_DISPLAY_NAME
|
||||
|
||||
def test_get_scim_family_name(
|
||||
self, mock_user, mock_user_with_scim_metadata, mock_user_minimal
|
||||
):
|
||||
# User with alias
|
||||
result = ScimTransformations._get_scim_family_name(mock_user)
|
||||
assert result == mock_user.user_alias
|
||||
|
||||
# User with SCIM metadata
|
||||
result = ScimTransformations._get_scim_family_name(mock_user_with_scim_metadata)
|
||||
assert result == "User"
|
||||
|
||||
# User without alias or metadata
|
||||
result = ScimTransformations._get_scim_family_name(mock_user_minimal)
|
||||
assert result == ScimTransformations.DEFAULT_SCIM_FAMILY_NAME
|
||||
|
||||
def test_get_scim_given_name(
|
||||
self, mock_user, mock_user_with_scim_metadata, mock_user_minimal
|
||||
):
|
||||
# User with alias
|
||||
result = ScimTransformations._get_scim_given_name(mock_user)
|
||||
assert result == mock_user.user_alias
|
||||
|
||||
# User with SCIM metadata
|
||||
result = ScimTransformations._get_scim_given_name(mock_user_with_scim_metadata)
|
||||
assert result == "Test"
|
||||
|
||||
# User without alias or metadata
|
||||
result = ScimTransformations._get_scim_given_name(mock_user_minimal)
|
||||
assert result == ScimTransformations.DEFAULT_SCIM_NAME
|
||||
|
||||
def test_get_scim_member_value(self):
|
||||
# Member with email
|
||||
member_with_email = Member(
|
||||
user_id="user-123", user_email="test@example.com", role="admin"
|
||||
)
|
||||
result = ScimTransformations._get_scim_member_value(member_with_email)
|
||||
assert result == member_with_email.user_email
|
||||
|
||||
# Member without email
|
||||
member_without_email = Member(user_id="user-456", user_email=None, role="user")
|
||||
result = ScimTransformations._get_scim_member_value(member_without_email)
|
||||
assert result == ScimTransformations.DEFAULT_SCIM_MEMBER_VALUE
|
|
@ -67,7 +67,7 @@ def test_user_daily_activity_types():
|
|||
"""
|
||||
Assert all fiels in SpendMetrics are reported in DailySpendMetadata as "total_"
|
||||
"""
|
||||
from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
||||
from litellm.proxy.management_endpoints.common_daily_activity import (
|
||||
DailySpendMetadata,
|
||||
SpendMetrics,
|
||||
)
|
||||
|
|
|
@ -19,13 +19,13 @@ from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
|
|||
BaseOpenAIPassThroughHandler,
|
||||
RouteChecks,
|
||||
create_pass_through_route,
|
||||
vertex_discovery_proxy_route,
|
||||
vertex_proxy_route,
|
||||
)
|
||||
from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
|
||||
|
||||
|
||||
class TestBaseOpenAIPassThroughHandler:
|
||||
|
||||
def test_join_url_paths(self):
|
||||
print("\nTesting _join_url_paths method...")
|
||||
|
||||
|
@ -456,3 +456,120 @@ class TestVertexAIPassThroughHandler:
|
|||
mock_auth.assert_called_once()
|
||||
call_args = mock_auth.call_args[1]
|
||||
assert call_args["api_key"] == "Bearer test-key-123"
|
||||
|
||||
|
||||
class TestVertexAIDiscoveryPassThroughHandler:
|
||||
"""
|
||||
Test cases for Vertex AI Discovery passthrough endpoint
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vertex_discovery_passthrough_with_credentials(self, monkeypatch):
|
||||
"""
|
||||
Test that when passthrough credentials are set, they are correctly used in the request
|
||||
"""
|
||||
from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
|
||||
PassthroughEndpointRouter,
|
||||
)
|
||||
|
||||
vertex_project = "test-project"
|
||||
vertex_location = "us-central1"
|
||||
vertex_credentials = "test-creds"
|
||||
|
||||
pass_through_router = PassthroughEndpointRouter()
|
||||
|
||||
pass_through_router.add_vertex_credentials(
|
||||
project_id=vertex_project,
|
||||
location=vertex_location,
|
||||
vertex_credentials=vertex_credentials,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
|
||||
pass_through_router,
|
||||
)
|
||||
|
||||
endpoint = f"/v1/projects/{vertex_project}/locations/{vertex_location}/dataStores/default/servingConfigs/default:search"
|
||||
|
||||
# Mock request
|
||||
mock_request = Request(
|
||||
scope={
|
||||
"type": "http",
|
||||
"method": "POST",
|
||||
"path": endpoint,
|
||||
"headers": [
|
||||
(b"Authorization", b"Bearer test-creds"),
|
||||
(b"Content-Type", b"application/json"),
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
# Mock response
|
||||
mock_response = Response()
|
||||
|
||||
# Mock vertex credentials
|
||||
test_project = vertex_project
|
||||
test_location = vertex_location
|
||||
test_token = vertex_credentials
|
||||
|
||||
with mock.patch(
|
||||
"litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
|
||||
) as mock_ensure_token, mock.patch(
|
||||
"litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
|
||||
) as mock_get_token, mock.patch(
|
||||
"litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
|
||||
) as mock_create_route:
|
||||
mock_ensure_token.return_value = ("test-auth-header", test_project)
|
||||
mock_get_token.return_value = (test_token, "")
|
||||
|
||||
# Call the route
|
||||
try:
|
||||
await vertex_discovery_proxy_route(
|
||||
endpoint=endpoint,
|
||||
request=mock_request,
|
||||
fastapi_response=mock_response,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
# Verify create_pass_through_route was called with correct arguments
|
||||
mock_create_route.assert_called_once_with(
|
||||
endpoint=endpoint,
|
||||
target=f"https://discoveryengine.googleapis.com/v1/projects/{test_project}/locations/{test_location}/dataStores/default/servingConfigs/default:search",
|
||||
custom_headers={"Authorization": f"Bearer {test_token}"},
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vertex_discovery_proxy_route_api_key_auth(self):
|
||||
"""
|
||||
Test that the route correctly handles API key authentication
|
||||
"""
|
||||
# Mock dependencies
|
||||
mock_request = Mock()
|
||||
mock_request.headers = {"x-litellm-api-key": "test-key-123"}
|
||||
mock_request.method = "POST"
|
||||
mock_response = Mock()
|
||||
|
||||
with patch(
|
||||
"litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth"
|
||||
) as mock_auth:
|
||||
mock_auth.return_value = {"api_key": "test-key-123"}
|
||||
|
||||
with patch(
|
||||
"litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
|
||||
) as mock_pass_through:
|
||||
mock_pass_through.return_value = AsyncMock(
|
||||
return_value={"status": "success"}
|
||||
)
|
||||
|
||||
# Call the function
|
||||
result = await vertex_discovery_proxy_route(
|
||||
endpoint="v1/projects/test-project/locations/us-central1/dataStores/default/servingConfigs/default:search",
|
||||
request=mock_request,
|
||||
fastapi_response=mock_response,
|
||||
)
|
||||
|
||||
# Verify user_api_key_auth was called with the correct Bearer token
|
||||
mock_auth.assert_called_once()
|
||||
call_args = mock_auth.call_args[1]
|
||||
assert call_args["api_key"] == "Bearer test-key-123"
|
||||
|
|
|
@ -20,6 +20,16 @@ from litellm.proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger
|
|||
from litellm.proxy.proxy_server import app, prisma_client
|
||||
from litellm.router import Router
|
||||
|
||||
ignored_keys = [
|
||||
"request_id",
|
||||
"startTime",
|
||||
"endTime",
|
||||
"completionStartTime",
|
||||
"endTime",
|
||||
"metadata.model_map_information",
|
||||
"metadata.usage_object",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
|
@ -457,7 +467,7 @@ class TestSpendLogsPayload:
|
|||
"model": "gpt-4o",
|
||||
"user": "",
|
||||
"team_id": "",
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "supports_reasoning": false, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_reasoning_token": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "supports_reasoning": false, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
|
||||
"cache_key": "Cache OFF",
|
||||
"spend": 0.00022500000000000002,
|
||||
"total_tokens": 30,
|
||||
|
@ -475,19 +485,11 @@ class TestSpendLogsPayload:
|
|||
}
|
||||
)
|
||||
|
||||
for key, value in expected_payload.items():
|
||||
if key in [
|
||||
"request_id",
|
||||
"startTime",
|
||||
"endTime",
|
||||
"completionStartTime",
|
||||
"endTime",
|
||||
]:
|
||||
assert payload[key] is not None
|
||||
else:
|
||||
assert (
|
||||
payload[key] == value
|
||||
), f"Expected {key} to be {value}, but got {payload[key]}"
|
||||
differences = _compare_nested_dicts(
|
||||
payload, expected_payload, ignore_keys=ignored_keys
|
||||
)
|
||||
if differences:
|
||||
assert False, f"Dictionary mismatch: {differences}"
|
||||
|
||||
def mock_anthropic_response(*args, **kwargs):
|
||||
mock_response = MagicMock()
|
||||
|
@ -573,19 +575,11 @@ class TestSpendLogsPayload:
|
|||
}
|
||||
)
|
||||
|
||||
for key, value in expected_payload.items():
|
||||
if key in [
|
||||
"request_id",
|
||||
"startTime",
|
||||
"endTime",
|
||||
"completionStartTime",
|
||||
"endTime",
|
||||
]:
|
||||
assert payload[key] is not None
|
||||
else:
|
||||
assert (
|
||||
payload[key] == value
|
||||
), f"Expected {key} to be {value}, but got {payload[key]}"
|
||||
differences = _compare_nested_dicts(
|
||||
payload, expected_payload, ignore_keys=ignored_keys
|
||||
)
|
||||
if differences:
|
||||
assert False, f"Dictionary mismatch: {differences}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_spend_logs_payload_success_log_with_router(self):
|
||||
|
@ -669,16 +663,71 @@ class TestSpendLogsPayload:
|
|||
}
|
||||
)
|
||||
|
||||
for key, value in expected_payload.items():
|
||||
if key in [
|
||||
"request_id",
|
||||
"startTime",
|
||||
"endTime",
|
||||
"completionStartTime",
|
||||
"endTime",
|
||||
]:
|
||||
assert payload[key] is not None
|
||||
else:
|
||||
assert (
|
||||
payload[key] == value
|
||||
), f"Expected {key} to be {value}, but got {payload[key]}"
|
||||
differences = _compare_nested_dicts(
|
||||
payload, expected_payload, ignore_keys=ignored_keys
|
||||
)
|
||||
if differences:
|
||||
assert False, f"Dictionary mismatch: {differences}"
|
||||
|
||||
|
||||
def _compare_nested_dicts(
|
||||
actual: dict, expected: dict, path: str = "", ignore_keys: list[str] = []
|
||||
) -> list[str]:
|
||||
"""Compare nested dictionaries and return a list of differences in a human-friendly format."""
|
||||
differences = []
|
||||
|
||||
# Check if current path should be ignored
|
||||
if path in ignore_keys:
|
||||
return differences
|
||||
|
||||
# Check for keys in actual but not in expected
|
||||
for key in actual.keys():
|
||||
current_path = f"{path}.{key}" if path else key
|
||||
if current_path not in ignore_keys and key not in expected:
|
||||
differences.append(f"Extra key in actual: {current_path}")
|
||||
|
||||
for key, expected_value in expected.items():
|
||||
current_path = f"{path}.{key}" if path else key
|
||||
if current_path in ignore_keys:
|
||||
continue
|
||||
if key not in actual:
|
||||
differences.append(f"Missing key: {current_path}")
|
||||
continue
|
||||
|
||||
actual_value = actual[key]
|
||||
|
||||
# Try to parse JSON strings
|
||||
if isinstance(expected_value, str):
|
||||
try:
|
||||
expected_value = json.loads(expected_value)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
if isinstance(actual_value, str):
|
||||
try:
|
||||
actual_value = json.loads(actual_value)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if isinstance(expected_value, dict) and isinstance(actual_value, dict):
|
||||
differences.extend(
|
||||
_compare_nested_dicts(
|
||||
actual_value, expected_value, current_path, ignore_keys
|
||||
)
|
||||
)
|
||||
elif isinstance(expected_value, dict) or isinstance(actual_value, dict):
|
||||
differences.append(
|
||||
f"Type mismatch at {current_path}: expected dict, got {type(actual_value).__name__}"
|
||||
)
|
||||
else:
|
||||
# For non-dict values, only report if they're different
|
||||
if actual_value != expected_value:
|
||||
# Format the values to be more readable
|
||||
actual_str = str(actual_value)
|
||||
expected_str = str(expected_value)
|
||||
if len(actual_str) > 50 or len(expected_str) > 50:
|
||||
actual_str = f"{actual_str[:50]}..."
|
||||
expected_str = f"{expected_str[:50]}..."
|
||||
differences.append(
|
||||
f"Value mismatch at {current_path}:\n expected: {expected_str}\n got: {actual_str}"
|
||||
)
|
||||
return differences
|
||||
|
|
158
tests/llm_responses_api_testing/base_responses_api.py
Normal file
158
tests/llm_responses_api_testing/base_responses_api.py
Normal file
|
@ -0,0 +1,158 @@
|
|||
|
||||
import httpx
|
||||
import json
|
||||
import pytest
|
||||
import sys
|
||||
from typing import Any, Dict, List
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
import os
|
||||
import uuid
|
||||
import time
|
||||
import base64
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import json
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
ResponseAPIUsage,
|
||||
IncompleteDetails,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
||||
|
||||
def validate_responses_api_response(response, final_chunk: bool = False):
|
||||
"""
|
||||
Validate that a response from litellm.responses() or litellm.aresponses()
|
||||
conforms to the expected ResponsesAPIResponse structure.
|
||||
|
||||
Args:
|
||||
response: The response object to validate
|
||||
|
||||
Raises:
|
||||
AssertionError: If the response doesn't match the expected structure
|
||||
"""
|
||||
# Validate response structure
|
||||
print("response=", json.dumps(response, indent=4, default=str))
|
||||
assert isinstance(
|
||||
response, ResponsesAPIResponse
|
||||
), "Response should be an instance of ResponsesAPIResponse"
|
||||
|
||||
# Required fields
|
||||
assert "id" in response and isinstance(
|
||||
response["id"], str
|
||||
), "Response should have a string 'id' field"
|
||||
assert "created_at" in response and isinstance(
|
||||
response["created_at"], (int, float)
|
||||
), "Response should have a numeric 'created_at' field"
|
||||
assert "output" in response and isinstance(
|
||||
response["output"], list
|
||||
), "Response should have a list 'output' field"
|
||||
assert "parallel_tool_calls" in response and isinstance(
|
||||
response["parallel_tool_calls"], bool
|
||||
), "Response should have a boolean 'parallel_tool_calls' field"
|
||||
|
||||
# Optional fields with their expected types
|
||||
optional_fields = {
|
||||
"error": (dict, type(None)), # error can be dict or None
|
||||
"incomplete_details": (IncompleteDetails, type(None)),
|
||||
"instructions": (str, type(None)),
|
||||
"metadata": dict,
|
||||
"model": str,
|
||||
"object": str,
|
||||
"temperature": (int, float, type(None)),
|
||||
"tool_choice": (dict, str),
|
||||
"tools": list,
|
||||
"top_p": (int, float, type(None)),
|
||||
"max_output_tokens": (int, type(None)),
|
||||
"previous_response_id": (str, type(None)),
|
||||
"reasoning": dict,
|
||||
"status": str,
|
||||
"text": ResponseTextConfig,
|
||||
"truncation": (str, type(None)),
|
||||
"usage": ResponseAPIUsage,
|
||||
"user": (str, type(None)),
|
||||
}
|
||||
if final_chunk is False:
|
||||
optional_fields["usage"] = type(None)
|
||||
|
||||
for field, expected_type in optional_fields.items():
|
||||
if field in response:
|
||||
assert isinstance(
|
||||
response[field], expected_type
|
||||
), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
|
||||
|
||||
# Check if output has at least one item
|
||||
if final_chunk is True:
|
||||
assert (
|
||||
len(response["output"]) > 0
|
||||
), "Response 'output' field should have at least one item"
|
||||
|
||||
return True # Return True if validation passes
|
||||
|
||||
|
||||
|
||||
class BaseResponsesAPITest(ABC):
|
||||
"""
|
||||
Abstract base test class that enforces a common test across all test classes.
|
||||
"""
|
||||
@abstractmethod
|
||||
def get_base_completion_call_args(self) -> dict:
|
||||
"""Must return the base completion call args"""
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_api(self, sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
print("litellm response=", json.dumps(response, indent=4, default=str))
|
||||
|
||||
# Use the helper function to validate the response
|
||||
validate_responses_api_response(response, final_chunk=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True])
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_api_streaming(self, sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
input="Basic ping",
|
||||
stream=True,
|
||||
**base_completion_call_args
|
||||
)
|
||||
for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
input="Basic ping",
|
||||
stream=True,
|
||||
**base_completion_call_args
|
||||
)
|
||||
async for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from unittest.mock import patch, AsyncMock
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import json
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
ResponseAPIUsage,
|
||||
IncompleteDetails,
|
||||
)
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from base_responses_api import BaseResponsesAPITest
|
||||
from openai.types.responses.function_tool import FunctionTool
|
||||
|
||||
|
||||
class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
|
||||
def get_base_completion_call_args(self):
|
||||
#litellm._turn_on_debug()
|
||||
return {
|
||||
"model": "anthropic/claude-3-5-sonnet-latest",
|
||||
}
|
||||
|
||||
|
||||
def test_multiturn_tool_calls():
|
||||
# Test streaming response with tools for Anthropic
|
||||
litellm._turn_on_debug()
|
||||
shell_tool = dict(FunctionTool(
|
||||
type="function",
|
||||
name="shell",
|
||||
description="Runs a shell command, and returns its output.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {"type": "array", "items": {"type": "string"}},
|
||||
"workdir": {"type": "string", "description": "The working directory for the command."}
|
||||
},
|
||||
"required": ["command"]
|
||||
},
|
||||
strict=True
|
||||
))
|
||||
|
||||
|
||||
|
||||
# Step 1: Initial request with the tool
|
||||
response = litellm.responses(
|
||||
input=[{
|
||||
'role': 'user',
|
||||
'content': [
|
||||
{'type': 'input_text', 'text': 'make a hello world html file'}
|
||||
],
|
||||
'type': 'message'
|
||||
}],
|
||||
model='anthropic/claude-3-7-sonnet-latest',
|
||||
instructions='You are a helpful coding assistant.',
|
||||
tools=[shell_tool]
|
||||
)
|
||||
|
||||
print("response=", response)
|
||||
|
||||
# Step 2: Send the results of the tool call back to the model
|
||||
# Get the response ID and tool call ID from the response
|
||||
|
||||
response_id = response.id
|
||||
tool_call_id = ""
|
||||
for item in response.output:
|
||||
if 'type' in item and item['type'] == 'function_call':
|
||||
tool_call_id = item['call_id']
|
||||
break
|
||||
|
||||
# Use await with asyncio.run for the async function
|
||||
follow_up_response = litellm.responses(
|
||||
model='anthropic/claude-3-7-sonnet-latest',
|
||||
previous_response_id=response_id,
|
||||
input=[{
|
||||
'type': 'function_call_output',
|
||||
'call_id': tool_call_id,
|
||||
'output': '{"output":"<html>\\n<head>\\n <title>Hello Page</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n <p>Welcome to this simple webpage!</p>\\n</body>\\n</html> > index.html\\n","metadata":{"exit_code":0,"duration_seconds":0}}'
|
||||
}],
|
||||
tools=[shell_tool]
|
||||
)
|
||||
|
||||
print("follow_up_response=", follow_up_response)
|
||||
|
||||
|
||||
|
31
tests/llm_responses_api_testing/test_azure_responses_api.py
Normal file
31
tests/llm_responses_api_testing/test_azure_responses_api.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from unittest.mock import patch, AsyncMock
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import json
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
ResponseAPIUsage,
|
||||
IncompleteDetails,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from base_responses_api import BaseResponsesAPITest
|
||||
|
||||
class TestAzureResponsesAPITest(BaseResponsesAPITest):
|
||||
def get_base_completion_call_args(self):
|
||||
return {
|
||||
"model": "azure/computer-use-preview",
|
||||
"truncation": "auto",
|
||||
"api_base": os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
|
||||
"api_key": os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_RESPONSES_OPENAI_API_VERSION"),
|
||||
}
|
|
@ -18,119 +18,13 @@ from litellm.types.llms.openai import (
|
|||
IncompleteDetails,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from base_responses_api import BaseResponsesAPITest, validate_responses_api_response
|
||||
|
||||
|
||||
def validate_responses_api_response(response, final_chunk: bool = False):
|
||||
"""
|
||||
Validate that a response from litellm.responses() or litellm.aresponses()
|
||||
conforms to the expected ResponsesAPIResponse structure.
|
||||
|
||||
Args:
|
||||
response: The response object to validate
|
||||
|
||||
Raises:
|
||||
AssertionError: If the response doesn't match the expected structure
|
||||
"""
|
||||
# Validate response structure
|
||||
print("response=", json.dumps(response, indent=4, default=str))
|
||||
assert isinstance(
|
||||
response, ResponsesAPIResponse
|
||||
), "Response should be an instance of ResponsesAPIResponse"
|
||||
|
||||
# Required fields
|
||||
assert "id" in response and isinstance(
|
||||
response["id"], str
|
||||
), "Response should have a string 'id' field"
|
||||
assert "created_at" in response and isinstance(
|
||||
response["created_at"], (int, float)
|
||||
), "Response should have a numeric 'created_at' field"
|
||||
assert "output" in response and isinstance(
|
||||
response["output"], list
|
||||
), "Response should have a list 'output' field"
|
||||
assert "parallel_tool_calls" in response and isinstance(
|
||||
response["parallel_tool_calls"], bool
|
||||
), "Response should have a boolean 'parallel_tool_calls' field"
|
||||
|
||||
# Optional fields with their expected types
|
||||
optional_fields = {
|
||||
"error": (dict, type(None)), # error can be dict or None
|
||||
"incomplete_details": (IncompleteDetails, type(None)),
|
||||
"instructions": (str, type(None)),
|
||||
"metadata": dict,
|
||||
"model": str,
|
||||
"object": str,
|
||||
"temperature": (int, float),
|
||||
"tool_choice": (dict, str),
|
||||
"tools": list,
|
||||
"top_p": (int, float),
|
||||
"max_output_tokens": (int, type(None)),
|
||||
"previous_response_id": (str, type(None)),
|
||||
"reasoning": dict,
|
||||
"status": str,
|
||||
"text": ResponseTextConfig,
|
||||
"truncation": str,
|
||||
"usage": ResponseAPIUsage,
|
||||
"user": (str, type(None)),
|
||||
}
|
||||
if final_chunk is False:
|
||||
optional_fields["usage"] = type(None)
|
||||
|
||||
for field, expected_type in optional_fields.items():
|
||||
if field in response:
|
||||
assert isinstance(
|
||||
response[field], expected_type
|
||||
), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
|
||||
|
||||
# Check if output has at least one item
|
||||
if final_chunk is True:
|
||||
assert (
|
||||
len(response["output"]) > 0
|
||||
), "Response 'output' field should have at least one item"
|
||||
|
||||
return True # Return True if validation passes
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_api(sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
model="gpt-4o", input="Basic ping", max_output_tokens=20
|
||||
)
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
model="gpt-4o", input="Basic ping", max_output_tokens=20
|
||||
)
|
||||
|
||||
print("litellm response=", json.dumps(response, indent=4, default=str))
|
||||
|
||||
# Use the helper function to validate the response
|
||||
validate_responses_api_response(response, final_chunk=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True])
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_api_streaming(sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
model="gpt-4o",
|
||||
input="Basic ping",
|
||||
stream=True,
|
||||
)
|
||||
for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
model="gpt-4o",
|
||||
input="Basic ping",
|
||||
stream=True,
|
||||
)
|
||||
async for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
class TestOpenAIResponsesAPITest(BaseResponsesAPITest):
|
||||
def get_base_completion_call_args(self):
|
||||
return {
|
||||
"model": "openai/gpt-4o",
|
||||
}
|
||||
|
||||
|
||||
class TestCustomLogger(CustomLogger):
|
||||
|
@ -693,7 +587,7 @@ async def test_openai_responses_litellm_router_no_metadata():
|
|||
|
||||
# Assert metadata is not in the request
|
||||
assert (
|
||||
loaded_request_body["metadata"] == None
|
||||
"metadata" not in loaded_request_body
|
||||
), "metadata should not be in the request body"
|
||||
mock_post.assert_called_once()
|
||||
|
||||
|
|
|
@ -76,6 +76,11 @@ class BaseLLMChatTest(ABC):
|
|||
"""Must return the base completion call args"""
|
||||
pass
|
||||
|
||||
|
||||
def get_base_completion_call_args_with_reasoning_model(self) -> dict:
|
||||
"""Must return the base completion call args with reasoning_effort"""
|
||||
return {}
|
||||
|
||||
def test_developer_role_translation(self):
|
||||
"""
|
||||
Test that the developer role is translated correctly for non-OpenAI providers.
|
||||
|
@ -1126,6 +1131,46 @@ class BaseLLMChatTest(ABC):
|
|||
|
||||
print(response)
|
||||
|
||||
def test_reasoning_effort(self):
|
||||
"""Test that reasoning_effort is passed correctly to the model"""
|
||||
from litellm.utils import supports_reasoning
|
||||
from litellm import completion
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args_with_reasoning_model()
|
||||
if len(base_completion_call_args) == 0:
|
||||
print("base_completion_call_args is empty")
|
||||
pytest.skip("Model does not support reasoning")
|
||||
if not supports_reasoning(base_completion_call_args["model"], None):
|
||||
print("Model does not support reasoning")
|
||||
pytest.skip("Model does not support reasoning")
|
||||
|
||||
_, provider, _, _ = litellm.get_llm_provider(
|
||||
model=base_completion_call_args["model"]
|
||||
)
|
||||
|
||||
## CHECK PARAM MAPPING
|
||||
optional_params = get_optional_params(
|
||||
model=base_completion_call_args["model"],
|
||||
custom_llm_provider=provider,
|
||||
reasoning_effort="high",
|
||||
)
|
||||
# either accepts reasoning effort or thinking budget
|
||||
assert "reasoning_effort" in optional_params or "4096" in json.dumps(optional_params)
|
||||
|
||||
try:
|
||||
litellm._turn_on_debug()
|
||||
response = completion(
|
||||
**base_completion_call_args,
|
||||
reasoning_effort="low",
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
print(f"response: {response}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error: {e}")
|
||||
|
||||
|
||||
|
||||
class BaseOSeriesModelsTest(ABC): # test across azure/openai
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue