Merge branch 'main' into litellm_add_managed_files_db

This commit is contained in:
Krish Dholakia 2025-04-12 07:49:33 -07:00 committed by GitHub
commit 37b4f5f5c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
59 changed files with 2902 additions and 361 deletions

View file

@ -18,7 +18,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.4.2
version: 0.4.3
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to

View file

@ -97,6 +97,9 @@ spec:
value: {{ $val | quote }}
{{- end }}
{{- end }}
{{- with .Values.extraEnvVars }}
{{- toYaml . | nindent 12 }}
{{- end }}
envFrom:
{{- range .Values.environmentSecrets }}
- secretRef:

View file

@ -80,3 +80,38 @@ tests:
secretKeyRef:
name: my-secret
key: my-key
- it: should work with extraEnvVars
template: deployment.yaml
set:
extraEnvVars:
- name: EXTRA_ENV_VAR
valueFrom:
fieldRef:
fieldPath: metadata.labels['env']
asserts:
- contains:
path: spec.template.spec.containers[0].env
content:
name: EXTRA_ENV_VAR
valueFrom:
fieldRef:
fieldPath: metadata.labels['env']
- it: should work with both extraEnvVars and envVars
template: deployment.yaml
set:
envVars:
ENV_VAR: ENV_VAR_VALUE
extraEnvVars:
- name: EXTRA_ENV_VAR
value: EXTRA_ENV_VAR_VALUE
asserts:
- contains:
path: spec.template.spec.containers[0].env
content:
name: ENV_VAR
value: ENV_VAR_VALUE
- contains:
path: spec.template.spec.containers[0].env
content:
name: EXTRA_ENV_VAR
value: EXTRA_ENV_VAR_VALUE

View file

@ -195,9 +195,15 @@ migrationJob:
annotations: {}
ttlSecondsAfterFinished: 120
# Additional environment variables to be added to the deployment
# Additional environment variables to be added to the deployment as a map of key-value pairs
envVars: {
# USE_DDTRACE: "true"
}
# Additional environment variables to be added to the deployment as a list of k8s env vars
extraEnvVars: {
# - name: EXTRA_ENV_VAR
# value: EXTRA_ENV_VAR_VALUE
}

View file

@ -478,7 +478,7 @@ response.stream_to_file(speech_file_path)
## **Authentication**
### Entrata ID - use `azure_ad_token`
### Entra ID - use `azure_ad_token`
This is a walkthrough on how to use Azure Active Directory Tokens - Microsoft Entra ID to make `litellm.completion()` calls
@ -545,7 +545,7 @@ model_list:
</TabItem>
</Tabs>
### Entrata ID - use tenant_id, client_id, client_secret
### Entra ID - use tenant_id, client_id, client_secret
Here is an example of setting up `tenant_id`, `client_id`, `client_secret` in your litellm proxy `config.yaml`
```yaml
@ -581,7 +581,7 @@ Example video of using `tenant_id`, `client_id`, `client_secret` with LiteLLM Pr
<iframe width="840" height="500" src="https://www.loom.com/embed/70d3f219ee7f4e5d84778b7f17bba506?sid=04b8ff29-485f-4cb8-929e-6b392722f36d" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
### Entrata ID - use client_id, username, password
### Entra ID - use client_id, username, password
Here is an example of setting up `client_id`, `azure_username`, `azure_password` in your litellm proxy `config.yaml`
```yaml

View file

@ -18,13 +18,14 @@ os.environ['XAI_API_KEY']
```
## Sample Usage
```python
```python showLineNumbers title="LiteLLM python sdk usage - Non-streaming"
from litellm import completion
import os
os.environ['XAI_API_KEY'] = ""
response = completion(
model="xai/grok-2-latest",
model="xai/grok-3-mini-beta",
messages=[
{
"role": "user",
@ -45,13 +46,14 @@ print(response)
```
## Sample Usage - Streaming
```python
```python showLineNumbers title="LiteLLM python sdk usage - Streaming"
from litellm import completion
import os
os.environ['XAI_API_KEY'] = ""
response = completion(
model="xai/grok-2-latest",
model="xai/grok-3-mini-beta",
messages=[
{
"role": "user",
@ -75,7 +77,8 @@ for chunk in response:
```
## Sample Usage - Vision
```python
```python showLineNumbers title="LiteLLM python sdk usage - Vision"
import os
from litellm import completion
@ -110,7 +113,7 @@ Here's how to call a XAI model with the LiteLLM Proxy Server
1. Modify the config.yaml
```yaml
```yaml showLineNumbers
model_list:
- model_name: my-model
litellm_params:
@ -131,7 +134,7 @@ Here's how to call a XAI model with the LiteLLM Proxy Server
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
```python showLineNumbers
import openai
client = openai.OpenAI(
api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys

View file

@ -15,10 +15,11 @@ Supported Providers:
- Bedrock (Anthropic + Deepseek) (`bedrock/`)
- Vertex AI (Anthropic) (`vertexai/`)
- OpenRouter (`openrouter/`)
- XAI (`xai/`)
LiteLLM will standardize the `reasoning_content` in the response and `thinking_blocks` in the assistant message.
```python
```python title="Example response from litellm"
"message": {
...
"reasoning_content": "The capital of France is Paris.",
@ -37,7 +38,7 @@ LiteLLM will standardize the `reasoning_content` in the response and `thinking_b
<Tabs>
<TabItem value="sdk" label="SDK">
```python
```python showLineNumbers
from litellm import completion
import os
@ -111,7 +112,7 @@ Here's how to use `thinking` blocks by Anthropic with tool calling.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
```python showLineNumbers
litellm._turn_on_debug()
litellm.modify_params = True
model = "anthropic/claude-3-7-sonnet-20250219" # works across Anthropic, Bedrock, Vertex AI
@ -210,7 +211,7 @@ if tool_calls:
1. Setup config.yaml
```yaml
```yaml showLineNumbers
model_list:
- model_name: claude-3-7-sonnet-thinking
litellm_params:
@ -224,7 +225,7 @@ model_list:
2. Run proxy
```bash
```bash showLineNumbers
litellm --config config.yaml
# RUNNING on http://0.0.0.0:4000
@ -332,7 +333,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
Set `drop_params=True` to drop the 'thinking' blocks when swapping from Anthropic to Deepseek models. Suggest improvements to this approach [here](https://github.com/BerriAI/litellm/discussions/8927).
```python
```python showLineNumbers
litellm.drop_params = True # 👈 EITHER GLOBALLY or per request
# or per request
@ -373,7 +374,7 @@ You can also pass the `thinking` parameter to Anthropic models.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
```python showLineNumbers
response = litellm.completion(
model="anthropic/claude-3-7-sonnet-20250219",
messages=[{"role": "user", "content": "What is the capital of France?"}],
@ -395,5 +396,92 @@ curl http://0.0.0.0:4000/v1/chat/completions \
}'
```
</TabItem>
</Tabs>
## Checking if a model supports reasoning
<Tabs>
<TabItem label="LiteLLM Python SDK" value="Python">
Use `litellm.supports_reasoning(model="")` -> returns `True` if model supports reasoning and `False` if not.
```python showLineNumbers title="litellm.supports_reasoning() usage"
import litellm
# Example models that support reasoning
assert litellm.supports_reasoning(model="anthropic/claude-3-7-sonnet-20250219") == True
assert litellm.supports_reasoning(model="deepseek/deepseek-chat") == True
# Example models that do not support reasoning
assert litellm.supports_reasoning(model="openai/gpt-3.5-turbo") == False
```
</TabItem>
<TabItem label="LiteLLM Proxy Server" value="proxy">
1. Define models that support reasoning in your `config.yaml`. You can optionally add `supports_reasoning: True` to the `model_info` if LiteLLM does not automatically detect it for your custom model.
```yaml showLineNumbers title="litellm proxy config.yaml"
model_list:
- model_name: claude-3-sonnet-reasoning
litellm_params:
model: anthropic/claude-3-7-sonnet-20250219
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: deepseek-reasoning
litellm_params:
model: deepseek/deepseek-chat
api_key: os.environ/DEEPSEEK_API_KEY
# Example for a custom model where detection might be needed
- model_name: my-custom-reasoning-model
litellm_params:
model: openai/my-custom-model # Assuming it's OpenAI compatible
api_base: http://localhost:8000
api_key: fake-key
model_info:
supports_reasoning: True # Explicitly mark as supporting reasoning
```
2. Run the proxy server:
```bash showLineNumbers title="litellm --config config.yaml"
litellm --config config.yaml
```
3. Call `/model_group/info` to check if your model supports `reasoning`
```shell showLineNumbers title="curl /model_group/info"
curl -X 'GET' \
'http://localhost:4000/model_group/info' \
-H 'accept: application/json' \
-H 'x-api-key: sk-1234'
```
Expected Response
```json showLineNumbers title="response from /model_group/info"
{
"data": [
{
"model_group": "claude-3-sonnet-reasoning",
"providers": ["anthropic"],
"mode": "chat",
"supports_reasoning": true,
},
{
"model_group": "deepseek-reasoning",
"providers": ["deepseek"],
"supports_reasoning": true,
},
{
"model_group": "my-custom-reasoning-model",
"providers": ["openai"],
"supports_reasoning": true,
}
]
}
````
</TabItem>
</Tabs>

View file

@ -2148,9 +2148,10 @@
}
},
"node_modules/@babel/runtime": {
"version": "7.26.0",
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.26.0.tgz",
"integrity": "sha512-FDSOghenHTiToteC/QRlv2q3DhPZ/oOXTBoirfWNx1Cx3TMVcGWQtMMmQcSvb/JjpNeGzx8Pq/b4fKEJuWm1sw==",
"version": "7.27.0",
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.0.tgz",
"integrity": "sha512-VtPOkrdPHZsKc/clNqyi9WUA8TINkZ4cGk63UUE3u4pmB2k+ZMQRDuIOagv8UVd6j7k0T3+RRIb7beKTebNbcw==",
"license": "MIT",
"dependencies": {
"regenerator-runtime": "^0.14.0"
},

View file

@ -38,7 +38,7 @@ hide_table_of_contents: false
2. OpenAI Moderations - `omni-moderation-latest` support. [Start Here](https://docs.litellm.ai/docs/moderation)
3. Azure O1 - fake streaming support. This ensures if a `stream=true` is passed, the response is streamed. [Start Here](https://docs.litellm.ai/docs/providers/azure)
4. Anthropic - non-whitespace char stop sequence handling - [PR](https://github.com/BerriAI/litellm/pull/7484)
5. Azure OpenAI - support entrata id username + password based auth. [Start Here](https://docs.litellm.ai/docs/providers/azure#entrata-id---use-tenant_id-client_id-client_secret)
5. Azure OpenAI - support Entra id username + password based auth. [Start Here](https://docs.litellm.ai/docs/providers/azure#entrata-id---use-tenant_id-client_id-client_secret)
6. LM Studio - embedding route support. [Start Here](https://docs.litellm.ai/docs/providers/lm-studio)
7. WatsonX - ZenAPIKeyAuth support. [Start Here](https://docs.litellm.ai/docs/providers/watsonx)

View file

@ -65,8 +65,8 @@ from litellm.proxy._types import (
KeyManagementSystem,
KeyManagementSettings,
LiteLLM_UpperboundKeyGenerateParams,
NewTeamRequest,
)
from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
from litellm.types.utils import StandardKeyGenerationConfig, LlmProviders
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
@ -277,7 +277,7 @@ default_key_generate_params: Optional[Dict] = None
upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = None
key_generation_settings: Optional[StandardKeyGenerationConfig] = None
default_internal_user_params: Optional[Dict] = None
default_team_params: Optional[Union[NewTeamRequest, Dict]] = None
default_team_params: Optional[Union[DefaultTeamSSOParams, Dict]] = None
default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None
default_max_internal_user_budget: Optional[float] = None
@ -772,6 +772,7 @@ from .utils import (
supports_audio_input,
supports_audio_output,
supports_system_messages,
supports_reasoning,
get_litellm_params,
acreate,
get_max_tokens,

View file

@ -68,7 +68,7 @@ class DatabricksBase:
headers: Optional[dict],
) -> Tuple[str, dict]:
if api_key is None and not headers: # handle empty headers
if custom_endpoint is not None:
if custom_endpoint is True:
raise DatabricksException(
status_code=400,
message="Missing API Key - A call is being made to LLM Provider but no key is set either in the environment variables ({LLM_PROVIDER}_API_KEY) or via params",

View file

@ -15,6 +15,21 @@ class LiteLLMProxyChatConfig(OpenAIGPTConfig):
list.append("thinking")
return list
def _map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
supported_openai_params = self.get_supported_openai_params(model)
for param, value in non_default_params.items():
if param == "thinking":
optional_params.setdefault("extra_body", {})["thinking"] = value
elif param in supported_openai_params:
optional_params[param] = value
return optional_params
def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]
) -> Tuple[Optional[str], Optional[str]]:

View file

@ -1,5 +1,7 @@
from typing import List, Optional, Tuple
import litellm
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.prompt_templates.common_utils import (
strip_name_from_messages,
)
@ -12,6 +14,10 @@ XAI_API_BASE = "https://api.x.ai/v1"
class XAIChatConfig(OpenAIGPTConfig):
@property
def custom_llm_provider(self) -> Optional[str]:
return "xai"
def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]
) -> Tuple[Optional[str], Optional[str]]:
@ -20,7 +26,7 @@ class XAIChatConfig(OpenAIGPTConfig):
return api_base, dynamic_api_key
def get_supported_openai_params(self, model: str) -> list:
return [
base_openai_params = [
"frequency_penalty",
"logit_bias",
"logprobs",
@ -39,6 +45,15 @@ class XAIChatConfig(OpenAIGPTConfig):
"top_p",
"user",
]
try:
if litellm.supports_reasoning(
model=model, custom_llm_provider=self.custom_llm_provider
):
base_openai_params.append("reasoning_effort")
except Exception as e:
verbose_logger.debug(f"Error checking if model supports reasoning: {e}")
return base_openai_params
def map_openai_params(
self,

View file

@ -15,6 +15,7 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_reasoning": true,
"supports_web_search": true,
"search_context_cost_per_query": {
"search_context_size_low": 0.0000,
@ -379,6 +380,7 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_native_streaming": false,
"supports_reasoning": true,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
@ -401,6 +403,7 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_native_streaming": false,
"supports_reasoning": true,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
@ -420,6 +423,7 @@
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"o1-mini": {
@ -448,6 +452,7 @@
"supports_vision": false,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"o3-mini-2025-01-31": {
@ -464,6 +469,7 @@
"supports_vision": false,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"o1-mini-2024-09-12": {
@ -476,6 +482,7 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"o1-preview": {
@ -488,6 +495,7 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"o1-preview-2024-09-12": {
@ -500,6 +508,7 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"o1-2024-12-17": {
@ -517,6 +526,7 @@
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"chatgpt-4o-latest": {
@ -1416,6 +1426,7 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "azure",
"mode": "chat",
"supports_reasoning": true,
"supports_vision": false,
"supports_prompt_caching": true,
"supports_tool_choice": true
@ -1432,6 +1443,7 @@
"litellm_provider": "azure",
"mode": "chat",
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1447,6 +1459,7 @@
"litellm_provider": "azure",
"mode": "chat",
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1477,6 +1490,7 @@
"mode": "chat",
"supports_vision": false,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
@ -1492,6 +1506,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/o1-mini-2024-09-12": {
@ -1506,6 +1521,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/us/o1-mini-2024-09-12": {
@ -1552,6 +1568,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1567,6 +1584,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1612,6 +1630,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/o1-preview-2024-09-12": {
@ -1626,6 +1645,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/us/o1-preview-2024-09-12": {
@ -2284,6 +2304,7 @@
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_tool_choice": true,
"supports_reasoning": true,
"source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
},
"azure_ai/deepseek-v3": {
@ -2984,6 +3005,7 @@
"supports_function_calling": true,
"supports_assistant_prefill": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"deepseek/deepseek-chat": {
@ -3097,6 +3119,87 @@
"supports_vision": true,
"supports_tool_choice": true
},
"xai/grok-3-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-fast-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000025,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-fast-latest": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000025,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-mini-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000003,
"output_cost_per_token": 0.0000005,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-mini-fast-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.000004,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-mini-fast-latest": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.000004,
"litellm_provider": "xai",
"mode": "chat",
"supports_reasoning": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-vision-beta": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@ -3167,6 +3270,7 @@
"mode": "chat",
"supports_system_messages": false,
"supports_function_calling": false,
"supports_reasoning": true,
"supports_response_schema": false,
"supports_tool_choice": true
},
@ -3678,7 +3782,8 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"claude-3-7-sonnet-20250219": {
"max_tokens": 128000,
@ -3698,7 +3803,8 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2026-02-01",
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"claude-3-5-sonnet-20241022": {
"max_tokens": 8192,
@ -5276,6 +5382,7 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_reasoning": true,
"supports_tool_choice": true
},
"vertex_ai/claude-3-haiku": {
@ -6593,6 +6700,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_prompt_caching": true
},
@ -6768,6 +6876,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_tool_choice": true
@ -6783,6 +6892,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"tool_use_system_prompt_tokens": 159,
"supports_tool_choice": true
},
@ -6950,6 +7060,7 @@
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_tool_choice": true
@ -6963,6 +7074,7 @@
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_tool_choice": true
@ -7780,6 +7892,7 @@
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
@ -7897,7 +8010,8 @@
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"us.anthropic.claude-3-haiku-20240307-v1:0": {
"max_tokens": 4096,
@ -8656,6 +8770,7 @@
"output_cost_per_token": 0.0000054,
"litellm_provider": "bedrock_converse",
"mode": "chat",
"supports_reasoning": true,
"supports_function_calling": false,
"supports_tool_choice": false
@ -10482,7 +10597,8 @@
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"databricks/databricks-meta-llama-3-1-405b-instruct": {
"max_tokens": 128000,
@ -10740,6 +10856,7 @@
"max_input_tokens": 32768,
"max_output_tokens": 8192,
"litellm_provider": "snowflake",
"supports_reasoning": true,
"mode": "chat"
},
"snowflake/snowflake-arctic": {

View file

@ -192,6 +192,28 @@ class LiteLLM_UpperboundKeyGenerateParams(LiteLLMPydanticObjectBase):
rpm_limit: Optional[int] = None
class KeyManagementRoutes(str, enum.Enum):
"""
Enum for key management routes
"""
# write routes
KEY_GENERATE = "/key/generate"
KEY_UPDATE = "/key/update"
KEY_DELETE = "/key/delete"
KEY_REGENERATE = "/key/regenerate"
KEY_REGENERATE_WITH_PATH_PARAM = "/key/{key_id}/regenerate"
KEY_BLOCK = "/key/block"
KEY_UNBLOCK = "/key/unblock"
# info and health routes
KEY_INFO = "/key/info"
KEY_HEALTH = "/key/health"
# list routes
KEY_LIST = "/key/list"
class LiteLLMRoutes(enum.Enum):
openai_route_names = [
"chat_completion",
@ -322,14 +344,19 @@ class LiteLLMRoutes(enum.Enum):
# NOTE: ROUTES ONLY FOR MASTER KEY - only the Master Key should be able to Reset Spend
master_key_only_routes = ["/global/spend/reset"]
management_routes = [ # key
"/key/generate",
"/key/{token_id}/regenerate",
"/key/update",
"/key/delete",
"/key/info",
"/key/health",
"/key/list",
key_management_routes = [
KeyManagementRoutes.KEY_GENERATE,
KeyManagementRoutes.KEY_UPDATE,
KeyManagementRoutes.KEY_DELETE,
KeyManagementRoutes.KEY_INFO,
KeyManagementRoutes.KEY_REGENERATE,
KeyManagementRoutes.KEY_REGENERATE_WITH_PATH_PARAM,
KeyManagementRoutes.KEY_LIST,
KeyManagementRoutes.KEY_BLOCK,
KeyManagementRoutes.KEY_UNBLOCK,
]
management_routes = [
# user
"/user/new",
"/user/update",
@ -349,7 +376,7 @@ class LiteLLMRoutes(enum.Enum):
"/model/update",
"/model/delete",
"/model/info",
]
] + key_management_routes
spend_tracking_routes = [
# spend
@ -619,9 +646,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
allowed_cache_controls: Optional[list] = []
config: Optional[dict] = {}
permissions: Optional[dict] = {}
model_max_budget: Optional[
dict
] = {} # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
model_max_budget: Optional[dict] = (
{}
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
model_config = ConfigDict(protected_namespaces=())
model_rpm_limit: Optional[dict] = None
@ -877,12 +904,12 @@ class NewCustomerRequest(BudgetNewRequest):
alias: Optional[str] = None # human-friendly alias
blocked: bool = False # allow/disallow requests for this end-user
budget_id: Optional[str] = None # give either a budget_id or max_budget
allowed_model_region: Optional[
AllowedModelRegion
] = None # require all user requests to use models in this specific region
default_model: Optional[
str
] = None # if no equivalent model in allowed region - default all requests to this model
allowed_model_region: Optional[AllowedModelRegion] = (
None # require all user requests to use models in this specific region
)
default_model: Optional[str] = (
None # if no equivalent model in allowed region - default all requests to this model
)
@model_validator(mode="before")
@classmethod
@ -904,12 +931,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
blocked: bool = False # allow/disallow requests for this end-user
max_budget: Optional[float] = None
budget_id: Optional[str] = None # give either a budget_id or max_budget
allowed_model_region: Optional[
AllowedModelRegion
] = None # require all user requests to use models in this specific region
default_model: Optional[
str
] = None # if no equivalent model in allowed region - default all requests to this model
allowed_model_region: Optional[AllowedModelRegion] = (
None # require all user requests to use models in this specific region
)
default_model: Optional[str] = (
None # if no equivalent model in allowed region - default all requests to this model
)
class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
@ -1044,9 +1071,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):
class AddTeamCallback(LiteLLMPydanticObjectBase):
callback_name: str
callback_type: Optional[
Literal["success", "failure", "success_and_failure"]
] = "success_and_failure"
callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
"success_and_failure"
)
callback_vars: Dict[str, str]
@model_validator(mode="before")
@ -1111,6 +1138,7 @@ class LiteLLM_TeamTable(TeamBase):
budget_duration: Optional[str] = None
budget_reset_at: Optional[datetime] = None
model_id: Optional[int] = None
team_member_permissions: Optional[List[str]] = None
litellm_model_table: Optional[LiteLLM_ModelTable] = None
created_at: Optional[datetime] = None
@ -1303,9 +1331,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
stored_in_db: Optional[bool]
field_default_value: Any
premium_field: bool = False
nested_fields: Optional[
List[FieldDetail]
] = None # For nested dictionary or Pydantic fields
nested_fields: Optional[List[FieldDetail]] = (
None # For nested dictionary or Pydantic fields
)
class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
@ -1571,9 +1599,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
budget_id: Optional[str] = None
created_at: datetime
updated_at: datetime
user: Optional[
Any
] = None # You might want to replace 'Any' with a more specific type if available
user: Optional[Any] = (
None # You might want to replace 'Any' with a more specific type if available
)
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
model_config = ConfigDict(protected_namespaces=())
@ -2157,6 +2185,11 @@ class ProxyErrorTypes(str, enum.Enum):
Cache ping error
"""
team_member_permission_error = "team_member_permission_error"
"""
Team member permission error
"""
@classmethod
def get_model_access_error_type_for_object(
cls, object_type: Literal["key", "user", "team"]
@ -2314,9 +2347,9 @@ class TeamModelDeleteRequest(BaseModel):
# Organization Member Requests
class OrganizationMemberAddRequest(OrgMemberAddRequest):
organization_id: str
max_budget_in_organization: Optional[
float
] = None # Users max budget within the organization
max_budget_in_organization: Optional[float] = (
None # Users max budget within the organization
)
class OrganizationMemberDeleteRequest(MemberDeleteRequest):
@ -2505,9 +2538,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
Maps provider names to their budget configs.
"""
providers: Dict[
str, ProviderBudgetResponseObject
] = {} # Dictionary mapping provider names to their budget configurations
providers: Dict[str, ProviderBudgetResponseObject] = (
{}
) # Dictionary mapping provider names to their budget configurations
class ProxyStateVariables(TypedDict):
@ -2635,9 +2668,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
enforce_rbac: bool = False
roles_jwt_field: Optional[str] = None # v2 on role mappings
role_mappings: Optional[List[RoleMapping]] = None
object_id_jwt_field: Optional[
str
] = None # can be either user / team, inferred from the role mapping
object_id_jwt_field: Optional[str] = (
None # can be either user / team, inferred from the role mapping
)
scope_mappings: Optional[List[ScopeMapping]] = None
enforce_scope_based_access: bool = False
enforce_team_based_model_access: bool = False

View file

@ -1,9 +1,9 @@
# What is this?
## This hook is used to check for LiteLLM managed files in the request body, and replace them with model-specific file id
import base64
import json
import asyncio
import uuid
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, cast
@ -415,6 +415,7 @@ class _PROXY_LiteLLMManagedFiles(CustomLogger):
[file_id], litellm_parent_otel_span
)
specific_model_file_id_mapping = model_file_id_mapping.get(file_id)
if specific_model_file_id_mapping:
exception_dict = {}
for model_id, file_id in specific_model_file_id_mapping.items():
@ -427,3 +428,4 @@ class _PROXY_LiteLLMManagedFiles(CustomLogger):
)
else:
raise Exception(f"LiteLLM Managed File object with id={file_id} not found")

View file

@ -1,7 +1,7 @@
"""
TEAM MANAGEMENT
All /team management endpoints
All /team management endpoints
/team/new
/team/info
@ -62,6 +62,9 @@ from litellm.proxy.management_endpoints.common_utils import (
_is_user_team_admin,
_set_object_metadata_field,
)
from litellm.proxy.management_helpers.team_member_permission_checks import (
TeamMemberPermissionChecks,
)
from litellm.proxy.management_helpers.utils import (
add_new_member,
management_endpoint_wrapper,
@ -72,6 +75,10 @@ from litellm.proxy.utils import (
handle_exception_on_proxy,
)
from litellm.router import Router
from litellm.types.proxy.management_endpoints.team_endpoints import (
GetTeamMemberPermissionsResponse,
UpdateTeamMemberPermissionsRequest,
)
router = APIRouter()
@ -506,12 +513,12 @@ async def update_team(
updated_kv["model_id"] = _model_id
updated_kv = prisma_client.jsonify_team_object(db_data=updated_kv)
team_row: Optional[
LiteLLM_TeamTable
] = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id},
data=updated_kv,
include={"litellm_model_table": True}, # type: ignore
team_row: Optional[LiteLLM_TeamTable] = (
await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id},
data=updated_kv,
include={"litellm_model_table": True}, # type: ignore
)
)
if team_row is None or team_row.team_id is None:
@ -1137,10 +1144,10 @@ async def delete_team(
team_rows: List[LiteLLM_TeamTable] = []
for team_id in data.team_ids:
try:
team_row_base: Optional[
BaseModel
] = await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": team_id}
team_row_base: Optional[BaseModel] = (
await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": team_id}
)
)
if team_row_base is None:
raise Exception
@ -1298,10 +1305,10 @@ async def team_info(
)
try:
team_info: Optional[
BaseModel
] = await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": team_id}
team_info: Optional[BaseModel] = (
await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": team_id}
)
)
if team_info is None:
raise Exception
@ -1926,3 +1933,89 @@ async def team_model_delete(
)
return updated_team
@router.get(
"/team/permissions_list",
tags=["team management"],
dependencies=[Depends(user_api_key_auth)],
)
@management_endpoint_wrapper
async def team_member_permissions(
team_id: str = fastapi.Query(
default=None, description="Team ID in the request parameters"
),
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
) -> GetTeamMemberPermissionsResponse:
"""
Get the team member permissions for a team
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
team_row = await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": team_id}
)
if team_row is None:
raise HTTPException(
status_code=404,
detail={"error": f"Team not found, passed team_id={team_id}"},
)
team_obj = LiteLLM_TeamTable(**team_row.model_dump())
if team_obj.team_member_permissions is None:
team_obj.team_member_permissions = (
TeamMemberPermissionChecks.default_team_member_permissions()
)
return GetTeamMemberPermissionsResponse(
team_id=team_id,
team_member_permissions=team_obj.team_member_permissions,
all_available_permissions=TeamMemberPermissionChecks.get_all_available_team_member_permissions(),
)
@router.post(
"/team/permissions_update",
tags=["team management"],
dependencies=[Depends(user_api_key_auth)],
)
async def update_team_member_permissions(
data: UpdateTeamMemberPermissionsRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
) -> LiteLLM_TeamTable:
"""
Update the team member permissions for a team
"""
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
team_row = await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": data.team_id}
)
if team_row is None:
raise HTTPException(
status_code=404,
detail={"error": f"Team not found, passed team_id={data.team_id}"},
)
if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN.value:
raise HTTPException(
status_code=403,
detail={"error": "Only proxy admin can update team member permissions"},
)
# Update the team member permissions
updated_team = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id},
data={"team_member_permissions": data.team_member_permissions},
)
return updated_team

View file

@ -941,7 +941,7 @@ class SSOAuthenticationHandler:
@staticmethod
def _cast_and_deepcopy_litellm_default_team_params(
default_team_params: Union[NewTeamRequest, Dict],
default_team_params: Union[DefaultTeamSSOParams, Dict],
team_request: NewTeamRequest,
litellm_team_id: str,
litellm_team_name: Optional[str] = None,
@ -949,23 +949,20 @@ class SSOAuthenticationHandler:
"""
Casts and deepcopies the litellm.default_team_params to a NewTeamRequest object
- Ensures we create a new NewTeamRequest object
- Handle the case where litellm.default_team_params is a dict or a NewTeamRequest object
- Adds the litellm_team_id and litellm_team_name to the NewTeamRequest object
- Ensures we create a new DefaultTeamSSOParams object
- Handle the case where litellm.default_team_params is a dict or a DefaultTeamSSOParams object
- Adds the litellm_team_id and litellm_team_name to the DefaultTeamSSOParams object
"""
if isinstance(default_team_params, dict):
_team_request = deepcopy(default_team_params)
_team_request["team_id"] = litellm_team_id
_team_request["team_alias"] = litellm_team_name
team_request = NewTeamRequest(**_team_request)
elif isinstance(litellm.default_team_params, NewTeamRequest):
team_request = litellm.default_team_params.model_copy(
deep=True,
update={
"team_id": litellm_team_id,
"team_alias": litellm_team_name,
},
)
elif isinstance(litellm.default_team_params, DefaultTeamSSOParams):
_default_team_params = deepcopy(litellm.default_team_params)
_new_team_request = team_request.model_dump()
_new_team_request.update(_default_team_params)
team_request = NewTeamRequest(**_new_team_request)
return team_request

View file

@ -0,0 +1,181 @@
from typing import List, Optional
from litellm.caching import DualCache
from litellm.proxy._types import (
KeyManagementRoutes,
LiteLLM_TeamTableCachedObj,
LiteLLM_VerificationToken,
LiteLLMRoutes,
LitellmUserRoles,
Member,
ProxyErrorTypes,
ProxyException,
Span,
UserAPIKeyAuth,
)
from litellm.proxy.auth.auth_checks import get_team_object
from litellm.proxy.auth.route_checks import RouteChecks
from litellm.proxy.utils import PrismaClient
DEFAULT_TEAM_MEMBER_PERMISSIONS = [
KeyManagementRoutes.KEY_INFO,
KeyManagementRoutes.KEY_HEALTH,
]
class TeamMemberPermissionChecks:
@staticmethod
def get_permissions_for_team_member(
team_member_object: Member,
team_table: LiteLLM_TeamTableCachedObj,
) -> List[KeyManagementRoutes]:
"""
Returns the permissions for a team member
"""
if team_table.team_member_permissions and isinstance(
team_table.team_member_permissions, list
):
return [
KeyManagementRoutes(permission)
for permission in team_table.team_member_permissions
]
return DEFAULT_TEAM_MEMBER_PERMISSIONS
@staticmethod
def _get_list_of_route_enum_as_str(
route_enum: List[KeyManagementRoutes],
) -> List[str]:
"""
Returns a list of the route enum as a list of strings
"""
return [route.value for route in route_enum]
@staticmethod
async def can_team_member_execute_key_management_endpoint(
user_api_key_dict: UserAPIKeyAuth,
route: KeyManagementRoutes,
prisma_client: PrismaClient,
user_api_key_cache: DualCache,
parent_otel_span: Optional[Span],
existing_key_row: LiteLLM_VerificationToken,
):
"""
Main handler for checking if a team member can update a key
"""
from litellm.proxy.management_endpoints.key_management_endpoints import (
_get_user_in_team,
)
# 1. Don't execute these checks if the user role is proxy admin
if user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN.value:
return
# 2. Check if the operation is being done on a team key
if existing_key_row.team_id is None:
return
# 3. Get Team Object from DB
team_table = await get_team_object(
team_id=existing_key_row.team_id,
prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache,
parent_otel_span=user_api_key_dict.parent_otel_span,
check_db_only=True,
)
# 4. Extract `Member` object from `team_table`
key_assigned_user_in_team = _get_user_in_team(
team_table=team_table, user_id=user_api_key_dict.user_id
)
# 5. Check if the team member has permissions for the endpoint
TeamMemberPermissionChecks.does_team_member_have_permissions_for_endpoint(
team_member_object=key_assigned_user_in_team,
team_table=team_table,
route=route,
)
@staticmethod
def does_team_member_have_permissions_for_endpoint(
team_member_object: Optional[Member],
team_table: LiteLLM_TeamTableCachedObj,
route: str,
) -> Optional[bool]:
"""
Raises an exception if the team member does not have permissions for calling the endpoint for a team
"""
# permission checks only run for non-admin users
# Non-Admin user trying to access information about a team's key
if team_member_object is None:
return False
if team_member_object.role == "admin":
return True
_team_member_permissions = (
TeamMemberPermissionChecks.get_permissions_for_team_member(
team_member_object=team_member_object,
team_table=team_table,
)
)
team_member_permissions = (
TeamMemberPermissionChecks._get_list_of_route_enum_as_str(
_team_member_permissions
)
)
if not RouteChecks.check_route_access(
route=route, allowed_routes=team_member_permissions
):
raise ProxyException(
message=f"Team member does not have permissions for endpoint: {route}. You only have access to the following endpoints: {team_member_permissions}",
type=ProxyErrorTypes.team_member_permission_error,
param=route,
code=401,
)
return True
@staticmethod
async def user_belongs_to_keys_team(
user_api_key_dict: UserAPIKeyAuth,
existing_key_row: LiteLLM_VerificationToken,
) -> bool:
"""
Returns True if the user belongs to the team that the key is assigned to
"""
from litellm.proxy.management_endpoints.key_management_endpoints import (
_get_user_in_team,
)
from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
if existing_key_row.team_id is None:
return False
team_table = await get_team_object(
team_id=existing_key_row.team_id,
prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache,
parent_otel_span=user_api_key_dict.parent_otel_span,
check_db_only=True,
)
# 4. Extract `Member` object from `team_table`
team_member_object = _get_user_in_team(
team_table=team_table, user_id=user_api_key_dict.user_id
)
return team_member_object is not None
@staticmethod
def get_all_available_team_member_permissions() -> List[str]:
"""
Returns all available team member permissions
"""
all_available_permissions = []
for route in LiteLLMRoutes.key_management_routes.value:
all_available_permissions.append(route.value)
return all_available_permissions
@staticmethod
def default_team_member_permissions() -> List[str]:
return [route.value for route in DEFAULT_TEAM_MEMBER_PERMISSIONS]

View file

@ -189,6 +189,51 @@ async def route_create_file(
return response
async def route_create_file(
llm_router: Optional[Router],
_create_file_request: CreateFileRequest,
purpose: OpenAIFilesPurpose,
proxy_logging_obj: ProxyLogging,
user_api_key_dict: UserAPIKeyAuth,
target_model_names_list: List[str],
is_router_model: bool,
router_model: Optional[str],
custom_llm_provider: str,
) -> OpenAIFileObject:
if (
litellm.enable_loadbalancing_on_batch_endpoints is True
and is_router_model
and router_model is not None
):
response = await _deprecated_loadbalanced_create_file(
llm_router=llm_router,
router_model=router_model,
_create_file_request=_create_file_request,
)
elif target_model_names_list:
response = await create_file_for_each_model(
llm_router=llm_router,
_create_file_request=_create_file_request,
target_model_names_list=target_model_names_list,
purpose=purpose,
proxy_logging_obj=proxy_logging_obj,
user_api_key_dict=user_api_key_dict,
)
else:
# get configs for custom_llm_provider
llm_provider_config = get_files_provider_config(
custom_llm_provider=custom_llm_provider
)
if llm_provider_config is not None:
# add llm_provider_config to data
_create_file_request.update(llm_provider_config)
_create_file_request.pop("custom_llm_provider", None) # type: ignore
# for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch
response = await litellm.acreate_file(**_create_file_request, custom_llm_provider=custom_llm_provider) # type: ignore
return response
@router.post(
"/{provider}/v1/files",
dependencies=[Depends(user_api_key_auth)],

View file

@ -1,5 +1,5 @@
#### CRUD ENDPOINTS for UI Settings #####
from typing import List
from typing import Any, List, Union
from fastapi import APIRouter, Depends, HTTPException
@ -7,6 +7,7 @@ import litellm
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import *
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
router = APIRouter()
@ -111,34 +112,31 @@ async def delete_allowed_ip(ip_address: IPAddress):
return {"message": f"IP {ip_address.ip} deleted successfully", "status": "success"}
@router.get(
"/get/internal_user_settings",
tags=["SSO Settings"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_sso_settings():
async def _get_settings_with_schema(
settings_key: str,
settings_class: Any,
config: dict,
) -> dict:
"""
Get all SSO settings from the litellm_settings configuration.
Returns a structured object with values and descriptions for UI display.
Common utility function to get settings with schema information.
Args:
settings_key: The key in litellm_settings to get
settings_class: The Pydantic class to use for schema
config: The config dictionary
"""
from pydantic import TypeAdapter
from litellm.proxy.proxy_server import proxy_config
# Load existing config
config = await proxy_config.get_config()
litellm_settings = config.get("litellm_settings", {}) or {}
default_internal_user_params = (
litellm_settings.get("default_internal_user_params", {}) or {}
)
settings_data = litellm_settings.get(settings_key, {}) or {}
# Create the settings object first
sso_settings = DefaultInternalUserParams(**(default_internal_user_params))
# Get the schema for UISSOSettings
schema = TypeAdapter(DefaultInternalUserParams).json_schema(by_alias=True)
# Create the settings object
settings = settings_class(**(settings_data))
# Get the schema
schema = TypeAdapter(settings_class).json_schema(by_alias=True)
# Convert to dict for response
settings_dict = sso_settings.model_dump()
settings_dict = settings.model_dump()
# Add descriptions to the response
result = {
@ -166,6 +164,89 @@ async def get_sso_settings():
return result
@router.get(
"/get/internal_user_settings",
tags=["SSO Settings"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_sso_settings():
"""
Get all SSO settings from the litellm_settings configuration.
Returns a structured object with values and descriptions for UI display.
"""
from litellm.proxy.proxy_server import proxy_config
# Load existing config
config = await proxy_config.get_config()
return await _get_settings_with_schema(
settings_key="default_internal_user_params",
settings_class=DefaultInternalUserParams,
config=config,
)
@router.get(
"/get/default_team_settings",
tags=["SSO Settings"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_default_team_settings():
"""
Get all SSO settings from the litellm_settings configuration.
Returns a structured object with values and descriptions for UI display.
"""
from litellm.proxy.proxy_server import proxy_config
# Load existing config
config = await proxy_config.get_config()
return await _get_settings_with_schema(
settings_key="default_team_params",
settings_class=DefaultTeamSSOParams,
config=config,
)
async def _update_litellm_setting(
settings: Union[DefaultInternalUserParams, DefaultTeamSSOParams],
settings_key: str,
in_memory_var: Any,
success_message: str,
):
"""
Common utility function to update `litellm_settings` in both memory and config.
Args:
settings: The settings object to update
settings_key: The key in litellm_settings to update
in_memory_var: The in-memory variable to update
success_message: Message to return on success
"""
from litellm.proxy.proxy_server import proxy_config
# Update the in-memory settings
in_memory_var = settings.model_dump(exclude_none=True)
# Load existing config
config = await proxy_config.get_config()
# Update config with new settings
if "litellm_settings" not in config:
config["litellm_settings"] = {}
config["litellm_settings"][settings_key] = settings.model_dump(exclude_none=True)
# Save the updated config
await proxy_config.save_config(new_config=config)
return {
"message": success_message,
"status": "success",
"settings": in_memory_var,
}
@router.patch(
"/update/internal_user_settings",
tags=["SSO Settings"],
@ -176,27 +257,27 @@ async def update_internal_user_settings(settings: DefaultInternalUserParams):
Update the default internal user parameters for SSO users.
These settings will be applied to new users who sign in via SSO.
"""
from litellm.proxy.proxy_server import proxy_config
# Update the in-memory settings
litellm.default_internal_user_params = settings.model_dump(exclude_none=True)
# Load existing config
config = await proxy_config.get_config()
# Update config with new settings
if "litellm_settings" not in config:
config["litellm_settings"] = {}
config["litellm_settings"]["default_internal_user_params"] = settings.model_dump(
exclude_none=True
return await _update_litellm_setting(
settings=settings,
settings_key="default_internal_user_params",
in_memory_var=litellm.default_internal_user_params,
success_message="Internal user settings updated successfully",
)
# Save the updated config
await proxy_config.save_config(new_config=config)
return {
"message": "Internal user settings updated successfully",
"status": "success",
"settings": litellm.default_internal_user_params,
}
@router.patch(
"/update/default_team_settings",
tags=["SSO Settings"],
dependencies=[Depends(user_api_key_auth)],
)
async def update_default_team_settings(settings: DefaultTeamSSOParams):
"""
Update the default team parameters for SSO users.
These settings will be applied to new teams created from SSO.
"""
return await _update_litellm_setting(
settings=settings,
settings_key="default_team_params",
in_memory_var=litellm.default_team_params,
success_message="Default team settings updated successfully",
)

View file

@ -339,9 +339,9 @@ class Router:
) # names of models under litellm_params. ex. azure/chatgpt-v-2
self.deployment_latency_map = {}
### CACHING ###
cache_type: Literal[
"local", "redis", "redis-semantic", "s3", "disk"
] = "local" # default to an in-memory cache
cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = (
"local" # default to an in-memory cache
)
redis_cache = None
cache_config: Dict[str, Any] = {}
@ -562,9 +562,9 @@ class Router:
)
)
self.model_group_retry_policy: Optional[
Dict[str, RetryPolicy]
] = model_group_retry_policy
self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = (
model_group_retry_policy
)
self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
if allowed_fails_policy is not None:
@ -619,7 +619,7 @@ class Router:
@staticmethod
def _create_redis_cache(
cache_config: Dict[str, Any]
cache_config: Dict[str, Any],
) -> Union[RedisCache, RedisClusterCache]:
"""
Initializes either a RedisCache or RedisClusterCache based on the cache_config.
@ -1105,9 +1105,9 @@ class Router:
"""
Adds default litellm params to kwargs, if set.
"""
self.default_litellm_params[
metadata_variable_name
] = self.default_litellm_params.pop("metadata", {})
self.default_litellm_params[metadata_variable_name] = (
self.default_litellm_params.pop("metadata", {})
)
for k, v in self.default_litellm_params.items():
if (
k not in kwargs and v is not None
@ -3243,11 +3243,11 @@ class Router:
if isinstance(e, litellm.ContextWindowExceededError):
if context_window_fallbacks is not None:
fallback_model_group: Optional[
List[str]
] = self._get_fallback_model_group_from_fallbacks(
fallbacks=context_window_fallbacks,
model_group=model_group,
fallback_model_group: Optional[List[str]] = (
self._get_fallback_model_group_from_fallbacks(
fallbacks=context_window_fallbacks,
model_group=model_group,
)
)
if fallback_model_group is None:
raise original_exception
@ -3279,11 +3279,11 @@ class Router:
e.message += "\n{}".format(error_message)
elif isinstance(e, litellm.ContentPolicyViolationError):
if content_policy_fallbacks is not None:
fallback_model_group: Optional[
List[str]
] = self._get_fallback_model_group_from_fallbacks(
fallbacks=content_policy_fallbacks,
model_group=model_group,
fallback_model_group: Optional[List[str]] = (
self._get_fallback_model_group_from_fallbacks(
fallbacks=content_policy_fallbacks,
model_group=model_group,
)
)
if fallback_model_group is None:
raise original_exception
@ -5046,6 +5046,11 @@ class Router:
and model_info["supports_web_search"] is True # type: ignore
):
model_group_info.supports_web_search = True
if (
model_info.get("supports_reasoning", None) is not None
and model_info["supports_reasoning"] is True # type: ignore
):
model_group_info.supports_reasoning = True
if (
model_info.get("supported_openai_params", None) is not None
and model_info["supported_openai_params"] is not None

View file

@ -0,0 +1,35 @@
from typing import List, Optional
from pydantic import BaseModel
class GetTeamMemberPermissionsRequest(BaseModel):
"""Request to get the team member permissions for a team"""
team_id: str
class GetTeamMemberPermissionsResponse(BaseModel):
"""Response to get the team member permissions for a team"""
team_id: str
"""
The team id that the permissions are for
"""
team_member_permissions: Optional[List[str]] = []
"""
The team member permissions currently set for the team
"""
all_available_permissions: List[str]
"""
All available team member permissions
"""
class UpdateTeamMemberPermissionsRequest(BaseModel):
"""Request to update the team member permissions for a team"""
team_id: str
team_member_permissions: List[str]

View file

@ -1,4 +1,8 @@
from typing import List, Optional, TypedDict
from typing import List, Literal, Optional, TypedDict
from pydantic import Field
from litellm.proxy._types import LiteLLMPydanticObjectBase, LitellmUserRoles
class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False):
@ -25,3 +29,30 @@ class MicrosoftServicePrincipalTeam(TypedDict, total=False):
principalDisplayName: Optional[str]
principalId: Optional[str]
class DefaultTeamSSOParams(LiteLLMPydanticObjectBase):
"""
Default parameters to apply when a new team is automatically created by LiteLLM via SSO Groups
"""
models: List[str] = Field(
default=[],
description="Default list of models that new automatically created teams can access",
)
max_budget: Optional[float] = Field(
default=None,
description="Default maximum budget (in USD) for new automatically created teams",
)
budget_duration: Optional[str] = Field(
default=None,
description="Default budget duration for new automatically created teams (e.g. 'daily', 'weekly', 'monthly')",
)
tpm_limit: Optional[int] = Field(
default=None,
description="Default tpm limit for new automatically created teams",
)
rpm_limit: Optional[int] = Field(
default=None,
description="Default rpm limit for new automatically created teams",
)

View file

@ -96,16 +96,18 @@ class ModelInfo(BaseModel):
id: Optional[
str
] # Allow id to be optional on input, but it will always be present as a str in the model instance
db_model: bool = False # used for proxy - to separate models which are stored in the db vs. config.
db_model: bool = (
False # used for proxy - to separate models which are stored in the db vs. config.
)
updated_at: Optional[datetime.datetime] = None
updated_by: Optional[str] = None
created_at: Optional[datetime.datetime] = None
created_by: Optional[str] = None
base_model: Optional[
str
] = None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
base_model: Optional[str] = (
None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
)
tier: Optional[Literal["free", "paid"]] = None
"""
@ -178,12 +180,12 @@ class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams):
custom_llm_provider: Optional[str] = None
tpm: Optional[int] = None
rpm: Optional[int] = None
timeout: Optional[
Union[float, str, httpx.Timeout]
] = None # if str, pass in as os.environ/
stream_timeout: Optional[
Union[float, str]
] = None # timeout when making stream=True calls, if str, pass in as os.environ/
timeout: Optional[Union[float, str, httpx.Timeout]] = (
None # if str, pass in as os.environ/
)
stream_timeout: Optional[Union[float, str]] = (
None # timeout when making stream=True calls, if str, pass in as os.environ/
)
max_retries: Optional[int] = None
organization: Optional[str] = None # for openai orgs
configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None
@ -253,9 +255,9 @@ class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams):
if max_retries is not None and isinstance(max_retries, str):
max_retries = int(max_retries) # cast to int
# We need to keep max_retries in args since it's a parameter of GenericLiteLLMParams
args[
"max_retries"
] = max_retries # Put max_retries back in args after popping it
args["max_retries"] = (
max_retries # Put max_retries back in args after popping it
)
super().__init__(**args, **params)
def __contains__(self, key):
@ -562,6 +564,7 @@ class ModelGroupInfo(BaseModel):
supports_parallel_function_calling: bool = Field(default=False)
supports_vision: bool = Field(default=False)
supports_web_search: bool = Field(default=False)
supports_reasoning: bool = Field(default=False)
supports_function_calling: bool = Field(default=False)
supported_openai_params: Optional[List[str]] = Field(default=[])
configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None

View file

@ -102,6 +102,7 @@ class ProviderSpecificModelInfo(TypedDict, total=False):
supports_native_streaming: Optional[bool]
supports_parallel_function_calling: Optional[bool]
supports_web_search: Optional[bool]
supports_reasoning: Optional[bool]
class SearchContextCostPerQuery(TypedDict, total=False):

View file

@ -484,7 +484,7 @@ def load_credentials_from_list(kwargs: dict):
def get_dynamic_callbacks(
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]]
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]],
) -> List:
returned_callbacks = litellm.callbacks.copy()
if dynamic_callbacks:
@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
## DYNAMIC CALLBACKS ##
dynamic_callbacks: Optional[
List[Union[str, Callable, CustomLogger]]
] = kwargs.pop("callbacks", None)
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
kwargs.pop("callbacks", None)
)
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
if len(all_callbacks) > 0:
@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915
exception=e,
retry_policy=kwargs.get("retry_policy"),
)
kwargs[
"retry_policy"
] = reset_retry_policy() # prevent infinite loops
kwargs["retry_policy"] = (
reset_retry_policy()
) # prevent infinite loops
litellm.num_retries = (
None # set retries to None to prevent infinite loops
)
@ -2229,6 +2229,15 @@ def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bo
)
def supports_reasoning(model: str, custom_llm_provider: Optional[str] = None) -> bool:
"""
Check if the given model supports reasoning and return a boolean value.
"""
return _supports_factory(
model=model, custom_llm_provider=custom_llm_provider, key="supports_reasoning"
)
def supports_embedding_image_input(
model: str, custom_llm_provider: Optional[str] = None
) -> bool:
@ -3004,16 +3013,16 @@ def get_optional_params( # noqa: PLR0915
True # so that main.py adds the function call to the prompt
)
if "tools" in non_default_params:
optional_params[
"functions_unsupported_model"
] = non_default_params.pop("tools")
optional_params["functions_unsupported_model"] = (
non_default_params.pop("tools")
)
non_default_params.pop(
"tool_choice", None
) # causes ollama requests to hang
elif "functions" in non_default_params:
optional_params[
"functions_unsupported_model"
] = non_default_params.pop("functions")
optional_params["functions_unsupported_model"] = (
non_default_params.pop("functions")
)
elif (
litellm.add_function_to_prompt
): # if user opts to add it to prompt instead
@ -3036,10 +3045,10 @@ def get_optional_params( # noqa: PLR0915
if "response_format" in non_default_params:
if provider_config is not None:
non_default_params[
"response_format"
] = provider_config.get_json_schema_from_pydantic_object(
response_format=non_default_params["response_format"]
non_default_params["response_format"] = (
provider_config.get_json_schema_from_pydantic_object(
response_format=non_default_params["response_format"]
)
)
else:
non_default_params["response_format"] = type_to_response_format_param(
@ -4055,9 +4064,9 @@ def _count_characters(text: str) -> int:
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
_choices: Union[
List[Union[Choices, StreamingChoices]], List[StreamingChoices]
] = response_obj.choices
_choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
response_obj.choices
)
response_str = ""
for choice in _choices:
@ -4597,6 +4606,7 @@ def _get_model_info_helper( # noqa: PLR0915
"supports_native_streaming", None
),
supports_web_search=_model_info.get("supports_web_search", False),
supports_reasoning=_model_info.get("supports_reasoning", False),
search_context_cost_per_query=_model_info.get(
"search_context_cost_per_query", None
),
@ -4669,6 +4679,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
supports_audio_output: Optional[bool]
supports_pdf_input: Optional[bool]
supports_web_search: Optional[bool]
supports_reasoning: Optional[bool]
Raises:
Exception: If the model is not mapped yet.
@ -6188,7 +6199,7 @@ def validate_chat_completion_user_messages(messages: List[AllMessageValues]):
def validate_chat_completion_tool_choice(
tool_choice: Optional[Union[dict, str]]
tool_choice: Optional[Union[dict, str]],
) -> Optional[Union[dict, str]]:
"""
Confirm the tool choice is passed in the OpenAI format.

View file

@ -15,6 +15,7 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_reasoning": true,
"supports_web_search": true,
"search_context_cost_per_query": {
"search_context_size_low": 0.0000,
@ -379,6 +380,7 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_native_streaming": false,
"supports_reasoning": true,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
@ -401,6 +403,7 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_native_streaming": false,
"supports_reasoning": true,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
@ -420,6 +423,7 @@
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"o1-mini": {
@ -448,6 +452,7 @@
"supports_vision": false,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"o3-mini-2025-01-31": {
@ -464,6 +469,7 @@
"supports_vision": false,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"o1-mini-2024-09-12": {
@ -476,6 +482,7 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"o1-preview": {
@ -488,6 +495,7 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"o1-preview-2024-09-12": {
@ -500,6 +508,7 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"o1-2024-12-17": {
@ -517,6 +526,7 @@
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"chatgpt-4o-latest": {
@ -1416,6 +1426,7 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "azure",
"mode": "chat",
"supports_reasoning": true,
"supports_vision": false,
"supports_prompt_caching": true,
"supports_tool_choice": true
@ -1432,6 +1443,7 @@
"litellm_provider": "azure",
"mode": "chat",
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1447,6 +1459,7 @@
"litellm_provider": "azure",
"mode": "chat",
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1477,6 +1490,7 @@
"mode": "chat",
"supports_vision": false,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
@ -1492,6 +1506,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/o1-mini-2024-09-12": {
@ -1506,6 +1521,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/us/o1-mini-2024-09-12": {
@ -1552,6 +1568,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1567,6 +1584,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"supports_prompt_caching": true,
"supports_tool_choice": true
},
@ -1612,6 +1630,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/o1-preview-2024-09-12": {
@ -1626,6 +1645,7 @@
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"azure/us/o1-preview-2024-09-12": {
@ -2284,6 +2304,7 @@
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_tool_choice": true,
"supports_reasoning": true,
"source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
},
"azure_ai/deepseek-v3": {
@ -2984,6 +3005,7 @@
"supports_function_calling": true,
"supports_assistant_prefill": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_prompt_caching": true
},
"deepseek/deepseek-chat": {
@ -3097,6 +3119,87 @@
"supports_vision": true,
"supports_tool_choice": true
},
"xai/grok-3-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-fast-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000025,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-fast-latest": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000025,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-mini-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000003,
"output_cost_per_token": 0.0000005,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-mini-fast-beta": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.000004,
"litellm_provider": "xai",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-3-mini-fast-latest": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.000004,
"litellm_provider": "xai",
"mode": "chat",
"supports_reasoning": true,
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_response_schema": false,
"source": "https://x.ai/api#pricing"
},
"xai/grok-vision-beta": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@ -3167,6 +3270,7 @@
"mode": "chat",
"supports_system_messages": false,
"supports_function_calling": false,
"supports_reasoning": true,
"supports_response_schema": false,
"supports_tool_choice": true
},
@ -3678,7 +3782,8 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"claude-3-7-sonnet-20250219": {
"max_tokens": 128000,
@ -3698,7 +3803,8 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2026-02-01",
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"claude-3-5-sonnet-20241022": {
"max_tokens": 8192,
@ -5276,6 +5382,7 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_reasoning": true,
"supports_tool_choice": true
},
"vertex_ai/claude-3-haiku": {
@ -6593,6 +6700,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_prompt_caching": true
},
@ -6768,6 +6876,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_tool_choice": true
@ -6783,6 +6892,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_reasoning": true,
"tool_use_system_prompt_tokens": 159,
"supports_tool_choice": true
},
@ -6950,6 +7060,7 @@
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_tool_choice": true
@ -6963,6 +7074,7 @@
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_tool_choice": true
@ -7780,6 +7892,7 @@
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
@ -7897,7 +8010,8 @@
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"us.anthropic.claude-3-haiku-20240307-v1:0": {
"max_tokens": 4096,
@ -8656,6 +8770,7 @@
"output_cost_per_token": 0.0000054,
"litellm_provider": "bedrock_converse",
"mode": "chat",
"supports_reasoning": true,
"supports_function_calling": false,
"supports_tool_choice": false
@ -10482,7 +10597,8 @@
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_tool_choice": true
"supports_tool_choice": true,
"supports_reasoning": true
},
"databricks/databricks-meta-llama-3-1-405b-instruct": {
"max_tokens": 128000,
@ -10740,6 +10856,7 @@
"max_input_tokens": 32768,
"max_output_tokens": 8192,
"litellm_provider": "snowflake",
"supports_reasoning": true,
"mode": "chat"
},
"snowflake/snowflake-arctic": {

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.65.7"
version = "1.65.8"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.65.7"
version = "1.65.8"
version_files = [
"pyproject.toml:^version"
]

View file

@ -0,0 +1,32 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../../..")
) # Adds the parent directory to the system path
from unittest.mock import MagicMock, patch
from litellm.llms.databricks.common_utils import DatabricksBase
def test_databricks_validate_environment():
databricks_base = DatabricksBase()
with patch.object(
databricks_base, "_get_databricks_credentials"
) as mock_get_credentials:
try:
databricks_base.databricks_validate_environment(
api_key=None,
api_base="my_api_base",
endpoint_type="chat_completions",
custom_endpoint=False,
headers=None,
)
except Exception:
pass
mock_get_credentials.assert_called_once()

View file

@ -0,0 +1,161 @@
import asyncio
import json
import os
import sys
import uuid
from typing import Optional, cast
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from fastapi import HTTPException
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../../")
) # Adds the parent directory to the system path
from litellm.proxy._types import UserAPIKeyAuth # Import UserAPIKeyAuth
from litellm.proxy._types import LiteLLM_TeamTable, LitellmUserRoles
from litellm.proxy.management_endpoints.team_endpoints import (
user_api_key_auth, # Assuming this dependency is needed
)
from litellm.proxy.management_endpoints.team_endpoints import (
GetTeamMemberPermissionsResponse,
UpdateTeamMemberPermissionsRequest,
router,
)
from litellm.proxy.management_helpers.team_member_permission_checks import (
TeamMemberPermissionChecks,
)
from litellm.proxy.proxy_server import app
# Setup TestClient
client = TestClient(app)
# Mock prisma_client
mock_prisma_client = MagicMock()
# Fixture to provide the mock prisma client
@pytest.fixture(autouse=True)
def mock_db_client():
with patch(
"litellm.proxy.proxy_server.prisma_client", mock_prisma_client
): # Mock in both places if necessary
yield mock_prisma_client
mock_prisma_client.reset_mock()
# Fixture to provide a mock admin user auth object
@pytest.fixture
def mock_admin_auth():
mock_auth = UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN)
return mock_auth
# Test for /team/permissions_list endpoint (GET)
@pytest.mark.asyncio
async def test_get_team_permissions_list_success(mock_db_client, mock_admin_auth):
"""
Test successful retrieval of team member permissions.
"""
test_team_id = "test-team-123"
mock_team_data = {
"team_id": test_team_id,
"team_alias": "Test Team",
"team_member_permissions": ["/key/generate", "/key/update"],
"spend": 0.0,
}
mock_team_row = MagicMock()
mock_team_row.model_dump.return_value = mock_team_data
mock_db_client.db.litellm_teamtable.find_unique = AsyncMock(
return_value=mock_team_row
)
# Override the dependency for this test
app.dependency_overrides[user_api_key_auth] = lambda: mock_admin_auth
response = client.get(f"/team/permissions_list?team_id={test_team_id}")
assert response.status_code == 200
response_data = response.json()
assert response_data["team_id"] == test_team_id
assert (
response_data["team_member_permissions"]
== mock_team_data["team_member_permissions"]
)
assert (
response_data["all_available_permissions"]
== TeamMemberPermissionChecks.get_all_available_team_member_permissions()
)
mock_db_client.db.litellm_teamtable.find_unique.assert_awaited_once_with(
where={"team_id": test_team_id}
)
# Clean up dependency override
app.dependency_overrides = {}
# Test for /team/permissions_update endpoint (POST)
@pytest.mark.asyncio
async def test_update_team_permissions_success(mock_db_client, mock_admin_auth):
"""
Test successful update of team member permissions by an admin.
"""
test_team_id = "test-team-456"
update_payload = {
"team_id": test_team_id,
"team_member_permissions": ["/key/generate", "/key/update"],
}
mock_existing_team_data = {
"team_id": test_team_id,
"team_alias": "Existing Team",
"team_member_permissions": ["/key/list"],
"spend": 0.0,
"models": [],
}
mock_updated_team_data = {
**mock_existing_team_data,
"team_member_permissions": update_payload["team_member_permissions"],
}
mock_existing_team_row = MagicMock(spec=LiteLLM_TeamTable)
mock_existing_team_row.model_dump.return_value = mock_existing_team_data
# Set attributes directly if model_dump isn't enough for LiteLLM_TeamTable usage
for key, value in mock_existing_team_data.items():
setattr(mock_existing_team_row, key, value)
mock_updated_team_row = MagicMock(spec=LiteLLM_TeamTable)
mock_updated_team_row.model_dump.return_value = mock_updated_team_data
# Set attributes directly if model_dump isn't enough for LiteLLM_TeamTable usage
for key, value in mock_updated_team_data.items():
setattr(mock_updated_team_row, key, value)
mock_db_client.db.litellm_teamtable.find_unique = AsyncMock(
return_value=mock_existing_team_row
)
mock_db_client.db.litellm_teamtable.update = AsyncMock(
return_value=mock_updated_team_row
)
# Override the dependency for this test
app.dependency_overrides[user_api_key_auth] = lambda: mock_admin_auth
response = client.post("/team/permissions_update", json=update_payload)
assert response.status_code == 200
response_data = response.json()
# Use model_dump for comparison if the endpoint returns the Prisma model directly
assert response_data == mock_updated_team_row.model_dump()
mock_db_client.db.litellm_teamtable.find_unique.assert_awaited_once_with(
where={"team_id": test_team_id}
)
mock_db_client.db.litellm_teamtable.update.assert_awaited_once_with(
where={"team_id": test_team_id},
data={"team_member_permissions": update_payload["team_member_permissions"]},
)
# Clean up dependency override
app.dependency_overrides = {}

View file

@ -19,6 +19,7 @@ from litellm.proxy._types import NewTeamRequest
from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.proxy.management_endpoints.types import CustomOpenID
from litellm.proxy.management_endpoints.ui_sso import (
DefaultTeamSSOParams,
GoogleSSOHandler,
MicrosoftSSOHandler,
SSOAuthenticationHandler,
@ -421,8 +422,10 @@ def test_get_group_ids_from_graph_api_response():
@pytest.mark.parametrize(
"team_params",
[
# Test case 1: Using NewTeamRequest
NewTeamRequest(max_budget=10, budget_duration="1d", models=["special-gpt-5"]),
# Test case 1: Using DefaultTeamSSOParams
DefaultTeamSSOParams(
max_budget=10, budget_duration="1d", models=["special-gpt-5"]
),
# Test case 2: Using Dict
{"max_budget": 10, "budget_duration": "1d", "models": ["special-gpt-5"]},
],

View file

@ -457,7 +457,7 @@ class TestSpendLogsPayload:
"model": "gpt-4o",
"user": "",
"team_id": "",
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "supports_reasoning": false, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
"cache_key": "Cache OFF",
"spend": 0.00022500000000000002,
"total_tokens": 30,
@ -555,7 +555,7 @@ class TestSpendLogsPayload:
"model": "claude-3-7-sonnet-20250219",
"user": "",
"team_id": "",
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "supports_reasoning": true, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"cache_key": "Cache OFF",
"spend": 0.01383,
"total_tokens": 2598,
@ -651,7 +651,7 @@ class TestSpendLogsPayload:
"model": "claude-3-7-sonnet-20250219",
"user": "",
"team_id": "",
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "supports_reasoning": true, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"cache_key": "Cache OFF",
"spend": 0.01383,
"total_tokens": 2598,

View file

@ -11,6 +11,7 @@ sys.path.insert(
from litellm.proxy._types import DefaultInternalUserParams, LitellmUserRoles
from litellm.proxy.proxy_server import app
from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
client = TestClient(app)
@ -25,7 +26,14 @@ def mock_proxy_config(monkeypatch):
"max_budget": 100.0,
"budget_duration": "30d",
"models": ["gpt-3.5-turbo", "gpt-4"],
}
},
"default_team_params": {
"models": ["gpt-3.5-turbo"],
"max_budget": 50.0,
"budget_duration": "14d",
"tpm_limit": 100,
"rpm_limit": 10,
},
}
}
@ -138,3 +146,76 @@ class TestProxySettingEndpoints:
# Verify save_config was called exactly once
assert mock_proxy_config["save_call_count"]() == 1
def test_get_default_team_settings(self, mock_proxy_config, mock_auth):
"""Test getting the default team settings"""
response = client.get("/get/default_team_settings")
assert response.status_code == 200
data = response.json()
# Check structure of response
assert "values" in data
assert "schema" in data
# Check values match our mock config
values = data["values"]
mock_params = mock_proxy_config["config"]["litellm_settings"][
"default_team_params"
]
assert values["models"] == mock_params["models"]
assert values["max_budget"] == mock_params["max_budget"]
assert values["budget_duration"] == mock_params["budget_duration"]
assert values["tpm_limit"] == mock_params["tpm_limit"]
assert values["rpm_limit"] == mock_params["rpm_limit"]
# Check schema contains descriptions
assert "properties" in data["schema"]
assert "models" in data["schema"]["properties"]
assert "description" in data["schema"]["properties"]["models"]
def test_update_default_team_settings(
self, mock_proxy_config, mock_auth, monkeypatch
):
"""Test updating the default team settings"""
# Mock litellm.default_team_params
import litellm
monkeypatch.setattr(litellm, "default_team_params", {})
# New settings to update
new_settings = {
"models": ["gpt-4", "claude-3"],
"max_budget": 150.0,
"budget_duration": "30d",
"tpm_limit": 200,
"rpm_limit": 20,
}
response = client.patch("/update/default_team_settings", json=new_settings)
assert response.status_code == 200
data = response.json()
# Check response structure
assert data["status"] == "success"
assert "settings" in data
# Verify settings were updated
settings = data["settings"]
assert settings["models"] == new_settings["models"]
assert settings["max_budget"] == new_settings["max_budget"]
assert settings["budget_duration"] == new_settings["budget_duration"]
assert settings["tpm_limit"] == new_settings["tpm_limit"]
assert settings["rpm_limit"] == new_settings["rpm_limit"]
# Verify the config was updated
updated_config = mock_proxy_config["config"]["litellm_settings"][
"default_team_params"
]
assert updated_config["models"] == new_settings["models"]
assert updated_config["max_budget"] == new_settings["max_budget"]
assert updated_config["tpm_limit"] == new_settings["tpm_limit"]
# Verify save_config was called exactly once
assert mock_proxy_config["save_call_count"]() == 1

View file

@ -514,6 +514,26 @@ def test_supports_web_search(model, expected_bool):
pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize(
"model, expected_bool",
[
("openai/o3-mini", True),
("o3-mini", True),
("xai/grok-3-mini-beta", True),
("xai/grok-3-mini-fast-beta", True),
("xai/grok-2", False),
("gpt-3.5-turbo", False),
],
)
def test_supports_reasoning(model, expected_bool):
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
try:
assert litellm.supports_reasoning(model=model) == expected_bool
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_get_max_token_unit_test():
"""
More complete testing in `test_completion_cost.py`

View file

@ -23,7 +23,7 @@ from litellm.utils import (
)
from litellm.main import stream_chunk_builder
from typing import Union
from litellm.types.utils import Usage, ModelResponse
# test_example.py
from abc import ABC, abstractmethod
from openai import OpenAI
@ -1398,4 +1398,77 @@ class BaseAnthropicChatTest(ABC):
)
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}
assert "reasoning_effort" not in optional_params
assert "reasoning_effort" not in optional_params
class BaseReasoningLLMTests(ABC):
"""
Base class for testing reasoning llms
- test that the responses contain reasoning_content
- test that the usage contains reasoning_tokens
"""
@abstractmethod
def get_base_completion_call_args(self) -> dict:
"""Must return the base completion call args"""
pass
@property
def completion_function(self):
return litellm.completion
def test_non_streaming_reasoning_effort(self):
"""
Base test for non-streaming reasoning effort
- Assert that `reasoning_content` is not None from response message
- Assert that `reasoning_tokens` is greater than 0 from usage
"""
litellm._turn_on_debug()
base_completion_call_args = self.get_base_completion_call_args()
response: ModelResponse = self.completion_function(**base_completion_call_args, reasoning_effort="low")
# user gets `reasoning_content` in the response message
assert response.choices[0].message.reasoning_content is not None
assert isinstance(response.choices[0].message.reasoning_content, str)
# user get `reasoning_tokens`
assert response.usage.completion_tokens_details.reasoning_tokens > 0
def test_streaming_reasoning_effort(self):
"""
Base test for streaming reasoning effort
- Assert that `reasoning_content` is not None from streaming response
- Assert that `reasoning_tokens` is greater than 0 from usage
"""
#litellm._turn_on_debug()
base_completion_call_args = self.get_base_completion_call_args()
response: CustomStreamWrapper = self.completion_function(
**base_completion_call_args,
reasoning_effort="low",
stream=True,
stream_options={
"include_usage": True
}
)
resoning_content: str = ""
usage: Usage = None
for chunk in response:
print(chunk)
if hasattr(chunk.choices[0].delta, "reasoning_content"):
resoning_content += chunk.choices[0].delta.reasoning_content
if hasattr(chunk, "usage"):
usage = chunk.usage
assert resoning_content is not None
assert len(resoning_content) > 0
print(f"usage: {usage}")
assert usage.completion_tokens_details.reasoning_tokens > 0

View file

@ -449,3 +449,19 @@ def test_litellm_gateway_from_sdk_with_response_cost_in_additional_headers():
)
assert response._hidden_params["response_cost"] == 120
def test_litellm_gateway_from_sdk_with_thinking_param():
try:
response = litellm.completion(
model="litellm_proxy/anthropic.claude-3-7-sonnet-20250219-v1:0",
messages=[{"role": "user", "content": "Hello world"}],
api_base="http://0.0.0.0:4000",
api_key="sk-PIp1h0RekR",
# client=openai_client,
thinking={"type": "enabled", "max_budget": 100},
)
pytest.fail("Expected an error to be raised")
except Exception as e:
assert "Connection error." in str(e)

View file

@ -1410,7 +1410,7 @@ def test_litellm_proxy_thinking_param():
custom_llm_provider="litellm_proxy",
thinking={"type": "enabled", "budget_tokens": 1024},
)
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 1024}
assert optional_params["extra_body"]["thinking"] == {"type": "enabled", "budget_tokens": 1024}
def test_gemini_modalities_param():
optional_params = get_optional_params(

View file

@ -18,6 +18,7 @@ from litellm import Choices, Message, ModelResponse, EmbeddingResponse, Usage
from litellm import completion
from unittest.mock import patch
from litellm.llms.xai.chat.transformation import XAIChatConfig, XAI_API_BASE
from base_llm_unit_tests import BaseReasoningLLMTests
def test_xai_chat_config_get_openai_compatible_provider_info():
@ -160,3 +161,11 @@ def test_xai_message_name_filtering():
)
assert response is not None
assert response.choices[0].message.content is not None
class TestXAIReasoningEffort(BaseReasoningLLMTests):
def get_base_completion_call_args(self):
return {
"model": "xai/grok-3-mini-beta",
"messages": [{"role": "user", "content": "Hello"}],
}

View file

@ -1075,9 +1075,14 @@ def test_embedding_response_ratelimit_headers(model):
hidden_params = response._hidden_params
additional_headers = hidden_params.get("additional_headers", {})
print(additional_headers)
assert "x-ratelimit-remaining-requests" in additional_headers
assert int(additional_headers["x-ratelimit-remaining-requests"]) > 0
print("additional_headers", additional_headers)
# Azure is flaky with returning x-ratelimit-remaining-requests, we need to verify the upstream api returns this header
# if upstream api returns this header, we need to verify the header is transformed by litellm
if "llm_provider-x-ratelimit-limit-requests" in additional_headers or "x-ratelimit-limit-requests" in additional_headers:
assert "x-ratelimit-remaining-requests" in additional_headers
assert int(additional_headers["x-ratelimit-remaining-requests"]) > 0
assert "x-ratelimit-remaining-tokens" in additional_headers
assert int(additional_headers["x-ratelimit-remaining-tokens"]) > 0

View file

@ -510,6 +510,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"supports_video_input": {"type": "boolean"},
"supports_vision": {"type": "boolean"},
"supports_web_search": {"type": "boolean"},
"supports_reasoning": {"type": "boolean"},
"tool_use_system_prompt_tokens": {"type": "number"},
"tpm": {"type": "number"},
"supported_endpoints": {

View file

@ -9,7 +9,7 @@
"model": "gpt-4o",
"user": "",
"team_id": "",
"metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"usage_object\": {\"completion_tokens\": 20, \"prompt_tokens\": 10, \"total_tokens\": 30, \"completion_tokens_details\": null, \"prompt_tokens_details\": null}, \"model_map_information\": {\"model_map_key\": \"gpt-4o\", \"model_map_value\": {\"key\": \"gpt-4o\", \"max_tokens\": 16384, \"max_input_tokens\": 128000, \"max_output_tokens\": 16384, \"input_cost_per_token\": 2.5e-06, \"cache_creation_input_token_cost\": null, \"cache_read_input_token_cost\": 1.25e-06, \"input_cost_per_character\": null, \"input_cost_per_token_above_128k_tokens\": null, \"input_cost_per_token_above_200k_tokens\": null, \"input_cost_per_query\": null, \"input_cost_per_second\": null, \"input_cost_per_audio_token\": null, \"input_cost_per_token_batches\": 1.25e-06, \"output_cost_per_token_batches\": 5e-06, \"output_cost_per_token\": 1e-05, \"output_cost_per_audio_token\": null, \"output_cost_per_character\": null, \"output_cost_per_token_above_128k_tokens\": null, \"output_cost_per_character_above_128k_tokens\": null, \"output_cost_per_token_above_200k_tokens\": null, \"output_cost_per_second\": null, \"output_cost_per_image\": null, \"output_vector_size\": null, \"litellm_provider\": \"openai\", \"mode\": \"chat\", \"supports_system_messages\": true, \"supports_response_schema\": true, \"supports_vision\": true, \"supports_function_calling\": true, \"supports_tool_choice\": true, \"supports_assistant_prefill\": false, \"supports_prompt_caching\": true, \"supports_audio_input\": false, \"supports_audio_output\": false, \"supports_pdf_input\": false, \"supports_embedding_image_input\": false, \"supports_native_streaming\": null, \"supports_web_search\": true, \"search_context_cost_per_query\": {\"search_context_size_low\": 0.03, \"search_context_size_medium\": 0.035, \"search_context_size_high\": 0.05}, \"tpm\": null, \"rpm\": null, \"supported_openai_params\": [\"frequency_penalty\", \"logit_bias\", \"logprobs\", \"top_logprobs\", \"max_tokens\", \"max_completion_tokens\", \"modalities\", \"prediction\", \"n\", \"presence_penalty\", \"seed\", \"stop\", \"stream\", \"stream_options\", \"temperature\", \"top_p\", \"tools\", \"tool_choice\", \"function_call\", \"functions\", \"max_retries\", \"extra_headers\", \"parallel_tool_calls\", \"audio\", \"response_format\", \"user\"]}}, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
"metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"usage_object\": {\"completion_tokens\": 20, \"prompt_tokens\": 10, \"total_tokens\": 30, \"completion_tokens_details\": null, \"prompt_tokens_details\": null}, \"model_map_information\": {\"model_map_key\": \"gpt-4o\", \"model_map_value\": {\"key\": \"gpt-4o\", \"max_tokens\": 16384, \"max_input_tokens\": 128000, \"max_output_tokens\": 16384, \"input_cost_per_token\": 2.5e-06, \"cache_creation_input_token_cost\": null, \"cache_read_input_token_cost\": 1.25e-06, \"input_cost_per_character\": null, \"input_cost_per_token_above_128k_tokens\": null, \"input_cost_per_token_above_200k_tokens\": null, \"input_cost_per_query\": null, \"input_cost_per_second\": null, \"input_cost_per_audio_token\": null, \"input_cost_per_token_batches\": 1.25e-06, \"output_cost_per_token_batches\": 5e-06, \"output_cost_per_token\": 1e-05, \"output_cost_per_audio_token\": null, \"output_cost_per_character\": null, \"output_cost_per_token_above_128k_tokens\": null, \"output_cost_per_character_above_128k_tokens\": null, \"output_cost_per_token_above_200k_tokens\": null, \"output_cost_per_second\": null, \"output_cost_per_image\": null, \"output_vector_size\": null, \"litellm_provider\": \"openai\", \"mode\": \"chat\", \"supports_system_messages\": true, \"supports_response_schema\": true, \"supports_vision\": true, \"supports_function_calling\": true, \"supports_tool_choice\": true, \"supports_assistant_prefill\": false, \"supports_prompt_caching\": true, \"supports_audio_input\": false, \"supports_audio_output\": false, \"supports_pdf_input\": false, \"supports_embedding_image_input\": false, \"supports_native_streaming\": null, \"supports_web_search\": true, \"supports_reasoning\": false, \"search_context_cost_per_query\": {\"search_context_size_low\": 0.03, \"search_context_size_medium\": 0.035, \"search_context_size_high\": 0.05}, \"tpm\": null, \"rpm\": null, \"supported_openai_params\": [\"frequency_penalty\", \"logit_bias\", \"logprobs\", \"top_logprobs\", \"max_tokens\", \"max_completion_tokens\", \"modalities\", \"prediction\", \"n\", \"presence_penalty\", \"seed\", \"stop\", \"stream\", \"stream_options\", \"temperature\", \"top_p\", \"tools\", \"tool_choice\", \"function_call\", \"functions\", \"max_retries\", \"extra_headers\", \"parallel_tool_calls\", \"audio\", \"response_format\", \"user\"]}}, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
"cache_key": "Cache OFF",
"spend": 0.00022500000000000002,
"total_tokens": 30,

View file

@ -22,7 +22,7 @@
"jwt-decode": "^4.0.0",
"moment": "^2.30.1",
"next": "^14.2.26",
"openai": "^4.28.0",
"openai": "^4.93.0",
"papaparse": "^5.5.2",
"react": "^18",
"react-copy-to-clipboard": "^5.1.0",
@ -1617,11 +1617,6 @@
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
},
"node_modules/base-64": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz",
"integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA=="
},
"node_modules/binary-extensions": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
@ -1810,14 +1805,6 @@
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/charenc": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz",
"integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==",
"engines": {
"node": "*"
}
},
"node_modules/chokidar": {
"version": "3.5.3",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz",
@ -1949,14 +1936,6 @@
"node": ">= 8"
}
},
"node_modules/crypt": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz",
"integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==",
"engines": {
"node": "*"
}
},
"node_modules/cssesc": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
@ -2212,15 +2191,6 @@
"resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
"integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw=="
},
"node_modules/digest-fetch": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz",
"integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==",
"dependencies": {
"base-64": "^0.1.0",
"md5": "^2.3.0"
}
},
"node_modules/dir-glob": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz",
@ -3694,11 +3664,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/is-buffer": {
"version": "1.1.6",
"resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
"integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
},
"node_modules/is-callable": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
@ -4311,16 +4276,6 @@
"node": "14 || >=16.14"
}
},
"node_modules/md5": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz",
"integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==",
"dependencies": {
"charenc": "0.0.2",
"crypt": "0.0.2",
"is-buffer": "~1.1.6"
}
},
"node_modules/mdast-util-from-markdown": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.0.tgz",
@ -5282,22 +5237,32 @@
}
},
"node_modules/openai": {
"version": "4.28.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-4.28.0.tgz",
"integrity": "sha512-JM8fhcpmpGN0vrUwGquYIzdcEQHtFuom6sRCbbCM6CfzZXNuRk33G7KfeRAIfnaCxSpzrP5iHtwJzIm6biUZ2Q==",
"version": "4.93.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-4.93.0.tgz",
"integrity": "sha512-2kONcISbThKLfm7T9paVzg+QCE1FOZtNMMUfXyXckUAoXRRS/mTP89JSDHPMp8uM5s0bz28RISbvQjArD6mgUQ==",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"digest-fetch": "^1.3.0",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7",
"web-streams-polyfill": "^3.2.1"
"node-fetch": "^2.6.7"
},
"bin": {
"openai": "bin/cli"
},
"peerDependencies": {
"ws": "^8.18.0",
"zod": "^3.23.8"
},
"peerDependenciesMeta": {
"ws": {
"optional": true
},
"zod": {
"optional": true
}
}
},
"node_modules/openai/node_modules/@types/node": {
@ -7701,14 +7666,6 @@
"d3-timer": "^3.0.1"
}
},
"node_modules/web-streams-polyfill": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.2.tgz",
"integrity": "sha512-3pRGuxRF5gpuZc0W+EpwQRmCD7gRqcDOMt688KmdlDAgAyaB1XlN0zq2njfDNm44XVdIouE7pZ6GzbdyH47uIQ==",
"engines": {
"node": ">= 8"
}
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",

View file

@ -23,7 +23,7 @@
"jwt-decode": "^4.0.0",
"moment": "^2.30.1",
"next": "^14.2.26",
"openai": "^4.28.0",
"openai": "^4.93.0",
"papaparse": "^5.5.2",
"react": "^18",
"react-copy-to-clipboard": "^5.1.0",

View file

@ -0,0 +1,307 @@
import React, { useState, useEffect } from "react";
import { Card, Title, Text, Divider, Button, TextInput } from "@tremor/react";
import { Typography, Spin, message, Switch, Select, Form } from "antd";
import { getDefaultTeamSettings, updateDefaultTeamSettings, modelAvailableCall } from "./networking";
import BudgetDurationDropdown, { getBudgetDurationLabel } from "./common_components/budget_duration_dropdown";
import { getModelDisplayName } from "./key_team_helpers/fetch_available_models_team_key";
interface TeamSSOSettingsProps {
accessToken: string | null;
userID: string;
userRole: string;
}
const TeamSSOSettings: React.FC<TeamSSOSettingsProps> = ({ accessToken, userID, userRole }) => {
const [loading, setLoading] = useState<boolean>(true);
const [settings, setSettings] = useState<any>(null);
const [isEditing, setIsEditing] = useState<boolean>(false);
const [editedValues, setEditedValues] = useState<any>({});
const [saving, setSaving] = useState<boolean>(false);
const [availableModels, setAvailableModels] = useState<string[]>([]);
const { Paragraph } = Typography;
const { Option } = Select;
useEffect(() => {
const fetchTeamSSOSettings = async () => {
if (!accessToken) {
setLoading(false);
return;
}
try {
const data = await getDefaultTeamSettings(accessToken);
setSettings(data);
setEditedValues(data.values || {});
// Fetch available models
if (accessToken) {
try {
const modelResponse = await modelAvailableCall(accessToken, userID, userRole);
if (modelResponse && modelResponse.data) {
const modelNames = modelResponse.data.map((model: { id: string }) => model.id);
setAvailableModels(modelNames);
}
} catch (error) {
console.error("Error fetching available models:", error);
}
}
} catch (error) {
console.error("Error fetching team SSO settings:", error);
message.error("Failed to fetch team settings");
} finally {
setLoading(false);
}
};
fetchTeamSSOSettings();
}, [accessToken]);
const handleSaveSettings = async () => {
if (!accessToken) return;
setSaving(true);
try {
const updatedSettings = await updateDefaultTeamSettings(accessToken, editedValues);
setSettings({...settings, values: updatedSettings.settings});
setIsEditing(false);
message.success("Default team settings updated successfully");
} catch (error) {
console.error("Error updating team settings:", error);
message.error("Failed to update team settings");
} finally {
setSaving(false);
}
};
const handleTextInputChange = (key: string, value: any) => {
setEditedValues((prev: Record<string, any>) => ({
...prev,
[key]: value
}));
};
const renderEditableField = (key: string, property: any, value: any) => {
const type = property.type;
if (key === "budget_duration") {
return (
<BudgetDurationDropdown
value={editedValues[key] || null}
onChange={(value) => handleTextInputChange(key, value)}
className="mt-2"
/>
);
} else if (type === "boolean") {
return (
<div className="mt-2">
<Switch
checked={!!editedValues[key]}
onChange={(checked) => handleTextInputChange(key, checked)}
/>
</div>
);
} else if (type === "array" && property.items?.enum) {
return (
<Select
mode="multiple"
style={{ width: '100%' }}
value={editedValues[key] || []}
onChange={(value) => handleTextInputChange(key, value)}
className="mt-2"
>
{property.items.enum.map((option: string) => (
<Option key={option} value={option}>{option}</Option>
))}
</Select>
);
} else if (key === "models") {
return (
<Select
mode="multiple"
style={{ width: '100%' }}
value={editedValues[key] || []}
onChange={(value) => handleTextInputChange(key, value)}
className="mt-2"
>
{availableModels.map((model: string) => (
<Option key={model} value={model}>
{getModelDisplayName(model)}
</Option>
))}
</Select>
);
} else if (type === "string" && property.enum) {
return (
<Select
style={{ width: '100%' }}
value={editedValues[key] || ""}
onChange={(value) => handleTextInputChange(key, value)}
className="mt-2"
>
{property.enum.map((option: string) => (
<Option key={option} value={option}>{option}</Option>
))}
</Select>
);
} else {
return (
<TextInput
value={editedValues[key] !== undefined ? String(editedValues[key]) : ""}
onChange={(e) => handleTextInputChange(key, e.target.value)}
placeholder={property.description || ""}
className="mt-2"
/>
);
}
};
const renderValue = (key: string, value: any): JSX.Element => {
if (value === null || value === undefined) return <span className="text-gray-400">Not set</span>;
if (key === "budget_duration") {
return <span>{getBudgetDurationLabel(value)}</span>;
}
if (typeof value === "boolean") {
return <span>{value ? "Enabled" : "Disabled"}</span>;
}
if (key === "models" && Array.isArray(value)) {
if (value.length === 0) return <span className="text-gray-400">None</span>;
return (
<div className="flex flex-wrap gap-2 mt-1">
{value.map((model, index) => (
<span key={index} className="px-2 py-1 bg-blue-100 rounded text-xs">
{getModelDisplayName(model)}
</span>
))}
</div>
);
}
if (typeof value === "object") {
if (Array.isArray(value)) {
if (value.length === 0) return <span className="text-gray-400">None</span>;
return (
<div className="flex flex-wrap gap-2 mt-1">
{value.map((item, index) => (
<span key={index} className="px-2 py-1 bg-blue-100 rounded text-xs">
{typeof item === "object" ? JSON.stringify(item) : String(item)}
</span>
))}
</div>
);
}
return (
<pre className="bg-gray-100 p-2 rounded text-xs overflow-auto mt-1">
{JSON.stringify(value, null, 2)}
</pre>
);
}
return <span>{String(value)}</span>;
};
if (loading) {
return (
<div className="flex justify-center items-center h-64">
<Spin size="large" />
</div>
);
}
if (!settings) {
return (
<Card>
<Text>No team settings available or you do not have permission to view them.</Text>
</Card>
);
}
// Dynamically render settings based on the schema
const renderSettings = () => {
const { values, schema } = settings;
if (!schema || !schema.properties) {
return <Text>No schema information available</Text>;
}
return Object.entries(schema.properties).map(([key, property]: [string, any]) => {
const value = values[key];
const displayName = key.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
return (
<div key={key} className="mb-6 pb-6 border-b border-gray-200 last:border-0">
<Text className="font-medium text-lg">{displayName}</Text>
<Paragraph className="text-sm text-gray-500 mt-1">
{property.description || "No description available"}
</Paragraph>
{isEditing ? (
<div className="mt-2">
{renderEditableField(key, property, value)}
</div>
) : (
<div className="mt-1 p-2 bg-gray-50 rounded">
{renderValue(key, value)}
</div>
)}
</div>
);
});
};
return (
<Card>
<div className="flex justify-between items-center mb-4">
<Title className="text-xl">Default Team Settings</Title>
{!loading && settings && (
isEditing ? (
<div className="flex gap-2">
<Button
variant="secondary"
onClick={() => {
setIsEditing(false);
setEditedValues(settings.values || {});
}}
disabled={saving}
>
Cancel
</Button>
<Button
onClick={handleSaveSettings}
loading={saving}
>
Save Changes
</Button>
</div>
) : (
<Button
onClick={() => setIsEditing(true)}
>
Edit Settings
</Button>
)
)}
</div>
<Text>
These settings will be applied by default when creating new teams.
</Text>
{settings?.schema?.description && (
<Paragraph className="mb-4 mt-2">{settings.schema.description}</Paragraph>
)}
<Divider />
<div className="mt-4 space-y-4">
{renderSettings()}
</div>
</Card>
);
};
export default TeamSSOSettings;

View file

@ -23,7 +23,7 @@ import {
Divider,
} from "@tremor/react";
import { message, Select, Spin, Typography, Tooltip } from "antd";
import { message, Select, Spin, Typography, Tooltip, Input } from "antd";
import { makeOpenAIChatCompletionRequest } from "./chat_ui/llm_calls/chat_completion";
import { makeOpenAIImageGenerationRequest } from "./chat_ui/llm_calls/image_generation";
import { fetchAvailableModels, ModelGroup } from "./chat_ui/llm_calls/fetch_models";
@ -33,6 +33,9 @@ import { coy } from 'react-syntax-highlighter/dist/esm/styles/prism';
import EndpointSelector from "./chat_ui/EndpointSelector";
import TagSelector from "./tag_management/TagSelector";
import { determineEndpointType } from "./chat_ui/EndpointUtils";
import { MessageType } from "./chat_ui/types";
import ReasoningContent from "./chat_ui/ReasoningContent";
import ResponseMetrics, { TokenUsage } from "./chat_ui/ResponseMetrics";
import {
SendOutlined,
ApiOutlined,
@ -45,6 +48,8 @@ import {
TagsOutlined
} from "@ant-design/icons";
const { TextArea } = Input;
interface ChatUIProps {
accessToken: string | null;
token: string | null;
@ -65,7 +70,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
);
const [apiKey, setApiKey] = useState("");
const [inputMessage, setInputMessage] = useState("");
const [chatHistory, setChatHistory] = useState<{ role: string; content: string; model?: string; isImage?: boolean }[]>([]);
const [chatHistory, setChatHistory] = useState<MessageType[]>([]);
const [selectedModel, setSelectedModel] = useState<string | undefined>(
undefined
);
@ -138,7 +143,11 @@ const ChatUI: React.FC<ChatUIProps> = ({
if (lastMessage && lastMessage.role === role && !lastMessage.isImage) {
return [
...prevHistory.slice(0, prevHistory.length - 1),
{ role, content: lastMessage.content + chunk, model },
{
...lastMessage,
content: lastMessage.content + chunk,
model
},
];
} else {
return [...prevHistory, { role, content: chunk, model }];
@ -146,6 +155,97 @@ const ChatUI: React.FC<ChatUIProps> = ({
});
};
const updateReasoningContent = (chunk: string) => {
setChatHistory((prevHistory) => {
const lastMessage = prevHistory[prevHistory.length - 1];
if (lastMessage && lastMessage.role === "assistant" && !lastMessage.isImage) {
return [
...prevHistory.slice(0, prevHistory.length - 1),
{
...lastMessage,
reasoningContent: (lastMessage.reasoningContent || "") + chunk
},
];
} else {
// If there's no assistant message yet, we'll create one with empty content
// but with reasoning content
if (prevHistory.length > 0 && prevHistory[prevHistory.length - 1].role === "user") {
return [
...prevHistory,
{
role: "assistant",
content: "",
reasoningContent: chunk
}
];
}
return prevHistory;
}
});
};
const updateTimingData = (timeToFirstToken: number) => {
console.log("updateTimingData called with:", timeToFirstToken);
setChatHistory((prevHistory) => {
const lastMessage = prevHistory[prevHistory.length - 1];
console.log("Current last message:", lastMessage);
if (lastMessage && lastMessage.role === "assistant") {
console.log("Updating assistant message with timeToFirstToken:", timeToFirstToken);
const updatedHistory = [
...prevHistory.slice(0, prevHistory.length - 1),
{
...lastMessage,
timeToFirstToken
},
];
console.log("Updated chat history:", updatedHistory);
return updatedHistory;
}
// If the last message is a user message and no assistant message exists yet,
// create a new assistant message with empty content
else if (lastMessage && lastMessage.role === "user") {
console.log("Creating new assistant message with timeToFirstToken:", timeToFirstToken);
return [
...prevHistory,
{
role: "assistant",
content: "",
timeToFirstToken
}
];
}
console.log("No appropriate message found to update timing");
return prevHistory;
});
};
const updateUsageData = (usage: TokenUsage) => {
console.log("Received usage data:", usage);
setChatHistory((prevHistory) => {
const lastMessage = prevHistory[prevHistory.length - 1];
if (lastMessage && lastMessage.role === "assistant") {
console.log("Updating message with usage data:", usage);
const updatedMessage = {
...lastMessage,
usage
};
console.log("Updated message:", updatedMessage);
return [
...prevHistory.slice(0, prevHistory.length - 1),
updatedMessage
];
}
return prevHistory;
});
};
const updateImageUI = (imageUrl: string, model: string) => {
setChatHistory((prevHistory) => [
...prevHistory,
@ -153,10 +253,12 @@ const ChatUI: React.FC<ChatUIProps> = ({
]);
};
const handleKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
if (event.key === 'Enter') {
const handleKeyDown = (event: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (event.key === 'Enter' && !event.shiftKey) {
event.preventDefault(); // Prevent default to avoid newline
handleSendMessage();
}
// If Shift+Enter is pressed, the default behavior (inserting a newline) will occur
};
const handleCancelRequest = () => {
@ -206,7 +308,10 @@ const ChatUI: React.FC<ChatUIProps> = ({
selectedModel,
effectiveApiKey,
selectedTags,
signal
signal,
updateReasoningContent,
updateTimingData,
updateUsageData
);
} else if (endpointType === EndpointType.IMAGE) {
// For image generation
@ -410,7 +515,16 @@ const ChatUI: React.FC<ChatUIProps> = ({
</span>
)}
</div>
<div className="whitespace-pre-wrap break-words max-w-full message-content">
{message.reasoningContent && (
<ReasoningContent reasoningContent={message.reasoningContent} />
)}
<div className="whitespace-pre-wrap break-words max-w-full message-content"
style={{
wordWrap: 'break-word',
overflowWrap: 'break-word',
wordBreak: 'break-word',
hyphens: 'auto'
}}>
{message.isImage ? (
<img
src={message.content}
@ -432,21 +546,33 @@ const ChatUI: React.FC<ChatUIProps> = ({
language={match[1]}
PreTag="div"
className="rounded-md my-2"
wrapLines={true}
wrapLongLines={true}
{...props}
>
{String(children).replace(/\n$/, '')}
</SyntaxHighlighter>
) : (
<code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} {...props}>
<code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} style={{ wordBreak: 'break-word' }} {...props}>
{children}
</code>
);
}
},
pre: ({ node, ...props }) => (
<pre style={{ overflowX: 'auto', maxWidth: '100%' }} {...props} />
)
}}
>
{message.content}
</ReactMarkdown>
)}
{message.role === "assistant" && (message.timeToFirstToken || message.usage) && (
<ResponseMetrics
timeToFirstToken={message.timeToFirstToken}
usage={message.usage}
/>
)}
</div>
</div>
</div>
@ -461,18 +587,19 @@ const ChatUI: React.FC<ChatUIProps> = ({
<div className="p-4 border-t border-gray-200 bg-white">
<div className="flex items-center">
<TextInput
type="text"
<TextArea
value={inputMessage}
onChange={(e) => setInputMessage(e.target.value)}
onKeyDown={handleKeyDown}
placeholder={
endpointType === EndpointType.CHAT
? "Type your message..."
? "Type your message... (Shift+Enter for new line)"
: "Describe the image you want to generate..."
}
disabled={isLoading}
className="flex-1"
autoSize={{ minRows: 1, maxRows: 6 }}
style={{ resize: 'none', paddingRight: '10px', paddingLeft: '10px' }}
/>
{isLoading ? (
<Button

View file

@ -0,0 +1,64 @@
import React, { useState } from "react";
import { Button, Collapse } from "antd";
import ReactMarkdown from "react-markdown";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
import { coy } from 'react-syntax-highlighter/dist/esm/styles/prism';
import { DownOutlined, RightOutlined, BulbOutlined } from "@ant-design/icons";
interface ReasoningContentProps {
reasoningContent: string;
}
const ReasoningContent: React.FC<ReasoningContentProps> = ({ reasoningContent }) => {
const [isExpanded, setIsExpanded] = useState(true);
if (!reasoningContent) return null;
return (
<div className="reasoning-content mt-1 mb-2">
<Button
type="text"
className="flex items-center text-xs text-gray-500 hover:text-gray-700"
onClick={() => setIsExpanded(!isExpanded)}
icon={<BulbOutlined />}
>
{isExpanded ? "Hide reasoning" : "Show reasoning"}
{isExpanded ? <DownOutlined className="ml-1" /> : <RightOutlined className="ml-1" />}
</Button>
{isExpanded && (
<div className="mt-2 p-3 bg-gray-50 border border-gray-200 rounded-md text-sm text-gray-700">
<ReactMarkdown
components={{
code({node, inline, className, children, ...props}: React.ComponentPropsWithoutRef<'code'> & {
inline?: boolean;
node?: any;
}) {
const match = /language-(\w+)/.exec(className || '');
return !inline && match ? (
<SyntaxHighlighter
style={coy as any}
language={match[1]}
PreTag="div"
className="rounded-md my-2"
{...props}
>
{String(children).replace(/\n$/, '')}
</SyntaxHighlighter>
) : (
<code className={`${className} px-1.5 py-0.5 rounded bg-gray-100 text-sm font-mono`} {...props}>
{children}
</code>
);
}
}}
>
{reasoningContent}
</ReactMarkdown>
</div>
)}
</div>
);
};
export default ReasoningContent;

View file

@ -0,0 +1,80 @@
import React from "react";
import { Tooltip } from "antd";
import {
ClockCircleOutlined,
NumberOutlined,
ImportOutlined,
ExportOutlined,
ThunderboltOutlined,
BulbOutlined
} from "@ant-design/icons";
export interface TokenUsage {
completionTokens?: number;
promptTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}
interface ResponseMetricsProps {
timeToFirstToken?: number; // in milliseconds
usage?: TokenUsage;
}
const ResponseMetrics: React.FC<ResponseMetricsProps> = ({
timeToFirstToken,
usage
}) => {
if (!timeToFirstToken && !usage) return null;
return (
<div className="response-metrics mt-2 pt-2 border-t border-gray-100 text-xs text-gray-500 flex flex-wrap gap-3">
{timeToFirstToken !== undefined && (
<Tooltip title="Time to first token">
<div className="flex items-center">
<ClockCircleOutlined className="mr-1" />
<span>{(timeToFirstToken / 1000).toFixed(2)}s</span>
</div>
</Tooltip>
)}
{usage?.promptTokens !== undefined && (
<Tooltip title="Prompt tokens">
<div className="flex items-center">
<ImportOutlined className="mr-1" />
<span>In: {usage.promptTokens}</span>
</div>
</Tooltip>
)}
{usage?.completionTokens !== undefined && (
<Tooltip title="Completion tokens">
<div className="flex items-center">
<ExportOutlined className="mr-1" />
<span>Out: {usage.completionTokens}</span>
</div>
</Tooltip>
)}
{usage?.reasoningTokens !== undefined && (
<Tooltip title="Reasoning tokens">
<div className="flex items-center">
<BulbOutlined className="mr-1" />
<span>Reasoning: {usage.reasoningTokens}</span>
</div>
</Tooltip>
)}
{usage?.totalTokens !== undefined && (
<Tooltip title="Total tokens">
<div className="flex items-center">
<NumberOutlined className="mr-1" />
<span>Total: {usage.totalTokens}</span>
</div>
</Tooltip>
)}
</div>
);
};
export default ResponseMetrics;

View file

@ -1,14 +1,18 @@
import openai from "openai";
import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
import { message } from "antd";
import { TokenUsage } from "../ResponseMetrics";
export async function makeOpenAIChatCompletionRequest(
chatHistory: { role: string; content: string }[],
updateUI: (chunk: string, model: string) => void,
updateUI: (chunk: string, model?: string) => void,
selectedModel: string,
accessToken: string,
tags?: string[],
signal?: AbortSignal
signal?: AbortSignal,
onReasoningContent?: (content: string) => void,
onTimingData?: (timeToFirstToken: number) => void,
onUsageData?: (usage: TokenUsage) => void
) {
// base url should be the current base_url
const isLocal = process.env.NODE_ENV === "development";
@ -20,23 +24,85 @@ export async function makeOpenAIChatCompletionRequest(
? "http://localhost:4000"
: window.location.origin;
const client = new openai.OpenAI({
apiKey: accessToken, // Replace with your OpenAI API key
baseURL: proxyBaseUrl, // Replace with your OpenAI API base URL
dangerouslyAllowBrowser: true, // using a temporary litellm proxy key
apiKey: accessToken,
baseURL: proxyBaseUrl,
dangerouslyAllowBrowser: true,
defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined,
});
try {
const startTime = Date.now();
let firstTokenReceived = false;
let timeToFirstToken: number | undefined = undefined;
// For collecting complete response text
let fullResponseContent = "";
let fullReasoningContent = "";
const response = await client.chat.completions.create({
model: selectedModel,
stream: true,
stream_options: {
include_usage: true,
},
messages: chatHistory as ChatCompletionMessageParam[],
}, { signal });
for await (const chunk of response) {
console.log(chunk);
if (chunk.choices[0].delta.content) {
updateUI(chunk.choices[0].delta.content, chunk.model);
console.log("Stream chunk:", chunk);
// Process content and measure time to first token
const delta = chunk.choices[0]?.delta as any;
// Debug what's in the delta
console.log("Delta content:", chunk.choices[0]?.delta?.content);
console.log("Delta reasoning content:", delta?.reasoning_content);
// Measure time to first token for either content or reasoning_content
if (!firstTokenReceived && (chunk.choices[0]?.delta?.content || (delta && delta.reasoning_content))) {
firstTokenReceived = true;
timeToFirstToken = Date.now() - startTime;
console.log("First token received! Time:", timeToFirstToken, "ms");
if (onTimingData) {
console.log("Calling onTimingData with:", timeToFirstToken);
onTimingData(timeToFirstToken);
} else {
console.log("onTimingData callback is not defined!");
}
}
// Process content
if (chunk.choices[0]?.delta?.content) {
const content = chunk.choices[0].delta.content;
updateUI(content, chunk.model);
fullResponseContent += content;
}
// Process reasoning content if present - using type assertion
if (delta && delta.reasoning_content) {
const reasoningContent = delta.reasoning_content;
if (onReasoningContent) {
onReasoningContent(reasoningContent);
}
fullReasoningContent += reasoningContent;
}
// Check for usage data using type assertion
const chunkWithUsage = chunk as any;
if (chunkWithUsage.usage && onUsageData) {
console.log("Usage data found:", chunkWithUsage.usage);
const usageData: TokenUsage = {
completionTokens: chunkWithUsage.usage.completion_tokens,
promptTokens: chunkWithUsage.usage.prompt_tokens,
totalTokens: chunkWithUsage.usage.total_tokens,
};
// Check for reasoning tokens
if (chunkWithUsage.usage.completion_tokens_details?.reasoning_tokens) {
usageData.reasoningTokens = chunkWithUsage.usage.completion_tokens_details.reasoning_tokens;
}
onUsageData(usageData);
}
}
} catch (error) {

View file

@ -0,0 +1,96 @@
import { TokenUsage } from "../ResponseMetrics";
export interface StreamingResponse {
id: string;
created: number;
model: string;
object: string;
system_fingerprint?: string;
choices: StreamingChoices[];
provider_specific_fields?: any;
stream_options?: any;
citations?: any;
usage?: Usage;
}
export interface StreamingChoices {
finish_reason?: string | null;
index: number;
delta: Delta;
logprobs?: any;
}
export interface Delta {
content?: string;
reasoning_content?: string;
role?: string;
function_call?: any;
tool_calls?: any;
audio?: any;
refusal?: any;
provider_specific_fields?: any;
}
export interface Usage {
completion_tokens: number;
prompt_tokens: number;
total_tokens: number;
completion_tokens_details?: {
accepted_prediction_tokens?: number;
audio_tokens?: number;
reasoning_tokens?: number;
rejected_prediction_tokens?: number;
text_tokens?: number | null;
};
prompt_tokens_details?: {
audio_tokens?: number;
cached_tokens?: number;
text_tokens?: number;
image_tokens?: number;
};
}
export interface StreamProcessCallbacks {
onContent: (content: string, model?: string) => void;
onReasoningContent: (content: string) => void;
onUsage?: (usage: TokenUsage) => void;
}
export const processStreamingResponse = (
response: StreamingResponse,
callbacks: StreamProcessCallbacks
) => {
// Extract model information if available
const model = response.model;
// Process regular content
if (response.choices && response.choices.length > 0) {
const choice = response.choices[0];
if (choice.delta?.content) {
callbacks.onContent(choice.delta.content, model);
}
// Process reasoning content if it exists
if (choice.delta?.reasoning_content) {
callbacks.onReasoningContent(choice.delta.reasoning_content);
}
}
// Process usage information if it exists and we have a handler
if (response.usage && callbacks.onUsage) {
console.log("Processing usage data:", response.usage);
const usageData: TokenUsage = {
completionTokens: response.usage.completion_tokens,
promptTokens: response.usage.prompt_tokens,
totalTokens: response.usage.total_tokens,
};
// Extract reasoning tokens if available
if (response.usage.completion_tokens_details?.reasoning_tokens) {
usageData.reasoningTokens = response.usage.completion_tokens_details.reasoning_tokens;
}
callbacks.onUsage(usageData);
}
};

View file

@ -0,0 +1,68 @@
export interface Delta {
content?: string;
reasoning_content?: string;
role?: string;
function_call?: any;
tool_calls?: any;
audio?: any;
refusal?: any;
provider_specific_fields?: any;
}
export interface CompletionTokensDetails {
accepted_prediction_tokens?: number;
audio_tokens?: number;
reasoning_tokens?: number;
rejected_prediction_tokens?: number;
text_tokens?: number | null;
}
export interface PromptTokensDetails {
audio_tokens?: number;
cached_tokens?: number;
text_tokens?: number;
image_tokens?: number;
}
export interface Usage {
completion_tokens: number;
prompt_tokens: number;
total_tokens: number;
completion_tokens_details?: CompletionTokensDetails;
prompt_tokens_details?: PromptTokensDetails;
}
export interface StreamingChoices {
finish_reason?: string | null;
index: number;
delta: Delta;
logprobs?: any;
}
export interface StreamingResponse {
id: string;
created: number;
model: string;
object: string;
system_fingerprint?: string;
choices: StreamingChoices[];
provider_specific_fields?: any;
stream_options?: any;
citations?: any;
usage?: Usage;
}
export interface MessageType {
role: string;
content: string;
model?: string;
isImage?: boolean;
reasoningContent?: string;
timeToFirstToken?: number;
usage?: {
completionTokens?: number;
promptTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
};
}

View file

@ -1023,6 +1023,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
is_proxy_admin={userRole === "Proxy Admin"}
userModels={all_models_on_proxy}
editTeam={false}
onUpdate={handleRefreshClick}
/>
</div>
);

View file

@ -4344,4 +4344,146 @@ export const tagDeleteCall = async (
console.error("Error deleting tag:", error);
throw error;
}
};
export const getDefaultTeamSettings = async (accessToken: string) => {
try {
// Construct base URL
let url = proxyBaseUrl
? `${proxyBaseUrl}/get/default_team_settings`
: `/get/default_team_settings`;
console.log("Fetching default team settings from:", url);
const response = await fetch(url, {
method: "GET",
headers: {
[globalLitellmHeaderName]: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
handleError(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log("Fetched default team settings:", data);
return data;
} catch (error) {
console.error("Failed to fetch default team settings:", error);
throw error;
}
};
export const updateDefaultTeamSettings = async (accessToken: string, settings: Record<string, any>) => {
try {
// Construct base URL
let url = proxyBaseUrl
? `${proxyBaseUrl}/update/default_team_settings`
: `/update/default_team_settings`;
console.log("Updating default team settings:", settings);
const response = await fetch(url, {
method: "PATCH",
headers: {
[globalLitellmHeaderName]: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify(settings),
});
if (!response.ok) {
const errorData = await response.text();
handleError(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log("Updated default team settings:", data);
message.success("Default team settings updated successfully");
return data;
} catch (error) {
console.error("Failed to update default team settings:", error);
throw error;
}
};
export const getTeamPermissionsCall = async (
accessToken: string,
teamId: string
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/team/permissions_list?team_id=${teamId}`
: `/team/permissions_list?team_id=${teamId}`;
const response = await fetch(url, {
method: "GET",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
});
if (!response.ok) {
const errorData = await response.text();
handleError(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log("Team permissions response:", data);
return data;
} catch (error) {
console.error("Failed to get team permissions:", error);
throw error;
}
};
export const teamPermissionsUpdateCall = async (
accessToken: string,
teamId: string,
permissions: string[]
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/team/permissions_update`
: `/team/permissions_update`;
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
body: JSON.stringify({
team_id: teamId,
team_member_permissions: permissions,
}),
});
if (!response.ok) {
const errorData = await response.text();
handleError(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log("Team permissions response:", data);
return data;
} catch (error) {
console.error("Failed to update team permissions:", error);
throw error;
}
};

View file

@ -0,0 +1,175 @@
import React, { useState, useEffect } from "react";
import {
Card,
Title,
Text,
Button as TremorButton,
Table,
TableHead,
TableHeaderCell,
TableBody,
TableRow,
TableCell,
} from "@tremor/react";
import { Button, message, Checkbox, Empty } from "antd";
import { ReloadOutlined, SaveOutlined } from "@ant-design/icons";
import { getTeamPermissionsCall, teamPermissionsUpdateCall } from "@/components/networking";
import { getPermissionInfo } from "./permission_definitions";
interface MemberPermissionsProps {
teamId: string;
accessToken: string | null;
canEditTeam: boolean;
}
const MemberPermissions: React.FC<MemberPermissionsProps> = ({
teamId,
accessToken,
canEditTeam,
}) => {
const [permissions, setPermissions] = useState<string[]>([]);
const [selectedPermissions, setSelectedPermissions] = useState<string[]>([]);
const [loading, setLoading] = useState(true);
const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false);
const fetchPermissions = async () => {
try {
setLoading(true);
if (!accessToken) return;
const response = await getTeamPermissionsCall(accessToken, teamId);
const allPermissions = response.all_available_permissions || [];
setPermissions(allPermissions);
const teamPermissions = response.team_member_permissions || [];
setSelectedPermissions(teamPermissions);
setHasChanges(false);
} catch (error) {
message.error("Failed to load permissions");
console.error("Error fetching permissions:", error);
} finally {
setLoading(false);
}
};
useEffect(() => {
fetchPermissions();
}, [teamId, accessToken]);
const handlePermissionChange = (permission: string, checked: boolean) => {
const newSelectedPermissions = checked
? [...selectedPermissions, permission]
: selectedPermissions.filter((p) => p !== permission);
setSelectedPermissions(newSelectedPermissions);
setHasChanges(true);
};
const handleSave = async () => {
try {
if (!accessToken) return;
setSaving(true);
await teamPermissionsUpdateCall(accessToken, teamId, selectedPermissions);
message.success("Permissions updated successfully");
setHasChanges(false);
} catch (error) {
message.error("Failed to update permissions");
console.error("Error updating permissions:", error);
} finally {
setSaving(false);
}
};
const handleReset = () => {
fetchPermissions();
};
if (loading) {
return <div className="p-6 text-center">Loading permissions...</div>;
}
const hasPermissions = permissions.length > 0;
return (
<Card className="bg-white shadow-md rounded-md p-6">
<div className="flex flex-col sm:flex-row justify-between items-start sm:items-center border-b pb-4 mb-6">
<Title className="mb-2 sm:mb-0">Member Permissions</Title>
{canEditTeam && hasChanges && (
<div className="flex gap-3">
<Button icon={<ReloadOutlined />} onClick={handleReset}>
Reset
</Button>
<TremorButton
onClick={handleSave}
loading={saving}
className="flex items-center gap-2"
>
<SaveOutlined /> Save Changes
</TremorButton>
</div>
)}
</div>
<Text className="mb-6 text-gray-600">
Control what team members can do when they are not team admins.
</Text>
{hasPermissions ? (
<Table className="mt-4">
<TableHead>
<TableRow>
<TableHeaderCell>Method</TableHeaderCell>
<TableHeaderCell>Endpoint</TableHeaderCell>
<TableHeaderCell>Description</TableHeaderCell>
<TableHeaderCell className="text-right">Access</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{permissions.map((permission) => {
const permInfo = getPermissionInfo(permission);
return (
<TableRow
key={permission}
className="hover:bg-gray-50 transition-colors"
>
<TableCell>
<span
className={`px-2 py-1 rounded text-xs font-medium ${
permInfo.method === "GET"
? "bg-blue-100 text-blue-800"
: "bg-green-100 text-green-800"
}`}
>
{permInfo.method}
</span>
</TableCell>
<TableCell>
<span className="font-mono text-sm text-gray-800">
{permInfo.endpoint}
</span>
</TableCell>
<TableCell className="text-gray-700">
{permInfo.description}
</TableCell>
<TableCell className="text-right">
<Checkbox
checked={selectedPermissions.includes(permission)}
onChange={(e) =>
handlePermissionChange(permission, e.target.checked)
}
disabled={!canEditTeam}
/>
</TableCell>
</TableRow>
);
})}
</TableBody>
</Table>
) : (
<div className="py-12">
<Empty description="No permissions available" />
</div>
)}
</Card>
);
};
export default MemberPermissions;

View file

@ -0,0 +1,64 @@
export interface PermissionInfo {
method: string;
endpoint: string;
description: string;
route: string;
}
/**
* Map of permission endpoint patterns to their descriptions
*/
export const PERMISSION_DESCRIPTIONS: Record<string, string> = {
'/key/generate': 'Member can generate a virtual key for this team',
'/key/update': 'Member can update a virtual key belonging to this team',
'/key/delete': 'Member can delete a virtual key belonging to this team',
'/key/info': 'Member can get info about a virtual key belonging to this team',
'/key/regenerate': 'Member can regenerate a virtual key belonging to this team',
'/key/{key_id}/regenerate': 'Member can regenerate a virtual key belonging to this team',
'/key/list': 'Member can list virtual keys belonging to this team',
'/key/block': 'Member can block a virtual key belonging to this team',
'/key/unblock': 'Member can unblock a virtual key belonging to this team'
};
/**
* Determines the HTTP method for a given permission endpoint
*/
export const getMethodForEndpoint = (endpoint: string): string => {
if (endpoint.includes('/info') || endpoint.includes('/list')) {
return 'GET';
}
return 'POST';
};
/**
* Parses a permission string into a structured PermissionInfo object
*/
export const getPermissionInfo = (permission: string): PermissionInfo => {
const method = getMethodForEndpoint(permission);
const endpoint = permission;
// Find exact match or fallback to default description
let description = PERMISSION_DESCRIPTIONS[permission];
// If no exact match, try to find a partial match based on patterns
if (!description) {
for (const [pattern, desc] of Object.entries(PERMISSION_DESCRIPTIONS)) {
if (permission.includes(pattern)) {
description = desc;
break;
}
}
}
// Fallback if no match found
if (!description) {
description = `Access ${permission}`;
}
return {
method,
endpoint,
description,
route: permission
};
};

View file

@ -20,6 +20,8 @@ import {
Table,
Icon
} from "@tremor/react";
import TeamMembersComponent from "./team_member_view";
import MemberPermissions from "./member_permissions";
import { teamInfoCall, teamMemberDeleteCall, teamMemberAddCall, teamMemberUpdateCall, Member, teamUpdateCall } from "@/components/networking";
import { Button, Form, Input, Select, message, Tooltip } from "antd";
import { InfoCircleOutlined } from '@ant-design/icons';
@ -30,10 +32,9 @@ import { PencilAltIcon, PlusIcon, TrashIcon } from "@heroicons/react/outline";
import MemberModal from "./edit_membership";
import UserSearchModal from "@/components/common_components/user_search_modal";
import { getModelDisplayName } from "../key_team_helpers/fetch_available_models_team_key";
import { Team } from "../key_team_helpers/key_list";
interface TeamData {
export interface TeamData {
team_id: string;
team_info: {
team_alias: string;
@ -62,15 +63,15 @@ interface TeamData {
team_memberships: any[];
}
interface TeamInfoProps {
export interface TeamInfoProps {
teamId: string;
onUpdate: (data: any) => void;
onClose: () => void;
accessToken: string | null;
is_team_admin: boolean;
is_proxy_admin: boolean;
userModels: string[];
editTeam: boolean;
onUpdate?: (team: Team) => void
}
const TeamInfoView: React.FC<TeamInfoProps> = ({
@ -80,8 +81,7 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
is_team_admin,
is_proxy_admin,
userModels,
editTeam,
onUpdate
editTeam
}) => {
const [teamData, setTeamData] = useState<TeamData | null>(null);
const [loading, setLoading] = useState(true);
@ -202,10 +202,7 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
};
const response = await teamUpdateCall(accessToken, updateData);
if (onUpdate) {
onUpdate(response.data)
}
message.success("Team settings updated successfully");
setIsEditing(false);
fetchTeamInfo();
@ -241,6 +238,7 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
<Tab key="overview">Overview</Tab>,
...(canEditTeam ? [
<Tab key="members">Members</Tab>,
<Tab key="member-permissions">Member Permissions</Tab>,
<Tab key="settings">Settings</Tab>
] : [])
]}
@ -287,58 +285,23 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
{/* Members Panel */}
<TabPanel>
<div className="space-y-4">
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]">
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>User ID</TableHeaderCell>
<TableHeaderCell>User Email</TableHeaderCell>
<TableHeaderCell>Role</TableHeaderCell>
<TableHeaderCell></TableHeaderCell>
</TableRow>
</TableHead>
<TeamMembersComponent
teamData={teamData}
canEditTeam={canEditTeam}
handleMemberDelete={handleMemberDelete}
setSelectedEditMember={setSelectedEditMember}
setIsEditMemberModalVisible={setIsEditMemberModalVisible}
setIsAddMemberModalVisible={setIsAddMemberModalVisible}
/>
</TabPanel>
<TableBody>
{teamData.team_info.members_with_roles.map((member: Member, index: number) => (
<TableRow key={index}>
<TableCell>
<Text className="font-mono">{member.user_id}</Text>
</TableCell>
<TableCell>
<Text className="font-mono">{member.user_email}</Text>
</TableCell>
<TableCell>
<Text className="font-mono">{member.role}</Text>
</TableCell>
<TableCell>
{canEditTeam && (
<>
<Icon
icon={PencilAltIcon}
size="sm"
onClick={() => {
setSelectedEditMember(member);
setIsEditMemberModalVisible(true);
}}
/>
<Icon
onClick={() => handleMemberDelete(member)}
icon={TrashIcon}
size="sm"
/>
</>
)}
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</Card>
<TremorButton onClick={() => setIsAddMemberModalVisible(true)}>
Add Member
</TremorButton>
</div>
{/* Member Permissions Panel */}
<TabPanel>
<MemberPermissions
teamId={teamId}
accessToken={accessToken}
canEditTeam={canEditTeam}
/>
</TabPanel>
{/* Settings Panel */}

View file

@ -0,0 +1,93 @@
import React from 'react';
import { Member } from "@/components/networking";
import {
Card,
Table,
TableHead,
TableRow,
TableHeaderCell,
TableBody,
TableCell,
Text,
Icon,
Button as TremorButton,
} from '@tremor/react';
import {
TeamData,
} from './team_info';
import { PencilAltIcon, PlusIcon, TrashIcon } from "@heroicons/react/outline";
interface TeamMembersComponentProps {
teamData: TeamData;
canEditTeam: boolean;
handleMemberDelete: (member: Member) => void;
setSelectedEditMember: (member: Member) => void;
setIsEditMemberModalVisible: (visible: boolean) => void;
setIsAddMemberModalVisible: (visible: boolean) => void;
}
const TeamMembersComponent: React.FC<TeamMembersComponentProps> = ({
teamData,
canEditTeam,
handleMemberDelete,
setSelectedEditMember,
setIsEditMemberModalVisible,
setIsAddMemberModalVisible,
}) => {
return (
<div className="space-y-4">
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]">
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>User ID</TableHeaderCell>
<TableHeaderCell>User Email</TableHeaderCell>
<TableHeaderCell>Role</TableHeaderCell>
<TableHeaderCell></TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{teamData.team_info.members_with_roles.map((member: Member, index: number) => (
<TableRow key={index}>
<TableCell>
<Text className="font-mono">{member.user_id}</Text>
</TableCell>
<TableCell>
<Text className="font-mono">{member.user_email}</Text>
</TableCell>
<TableCell>
<Text className="font-mono">{member.role}</Text>
</TableCell>
<TableCell>
{canEditTeam && (
<>
<Icon
icon={PencilAltIcon}
size="sm"
onClick={() => {
setSelectedEditMember(member);
setIsEditMemberModalVisible(true);
}}
/>
<Icon
icon={TrashIcon}
size="sm"
onClick={() => handleMemberDelete(member)}
/>
</>
)}
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</Card>
<TremorButton onClick={() => setIsAddMemberModalVisible(true)}>
Add Member
</TremorButton>
</div>
);
};
export default TeamMembersComponent;

View file

@ -27,6 +27,8 @@ import { Select, SelectItem } from "@tremor/react";
import { InfoCircleOutlined } from '@ant-design/icons';
import { getGuardrailsList } from "./networking";
import TeamInfoView from "@/components/team/team_info";
import TeamSSOSettings from "@/components/TeamSSOSettings";
import { isAdminRole } from "@/utils/roles";
import {
Table,
TableBody,
@ -322,7 +324,7 @@ const Teams: React.FC<TeamProps> = ({
{selectedTeamId ? (
<TeamInfoView
teamId={selectedTeamId}
onUpdate={data => {
onUpdate={(data) => {
setTeams(teams => {
if (teams == null) {
return teams;
@ -354,6 +356,7 @@ const Teams: React.FC<TeamProps> = ({
<div className="flex">
<Tab>Your Teams</Tab>
<Tab>Available Teams</Tab>
{isAdminRole(userRole || "") && <Tab>Default Team Settings</Tab>}
</div>
<div className="flex items-center space-x-2">
{lastRefreshed && <Text>Last Refreshed: {lastRefreshed}</Text>}
@ -797,6 +800,15 @@ const Teams: React.FC<TeamProps> = ({
userID={userID}
/>
</TabPanel>
{isAdminRole(userRole || "") && (
<TabPanel>
<TeamSSOSettings
accessToken={accessToken}
userID={userID || ""}
userRole={userRole || ""}
/>
</TabPanel>
)}
</TabPanels>
</TabGroup>)}

View file

@ -5,4 +5,10 @@ export const all_admin_roles = [...old_admin_roles, ...v2_admin_role_names];
export const internalUserRoles = ["Internal User", "Internal Viewer"];
export const rolesAllowedToSeeUsage = ["Admin", "Admin Viewer", "Internal User", "Internal Viewer"];
export const rolesWithWriteAccess = ["Internal User", "Admin"];
export const rolesWithWriteAccess = ["Internal User", "Admin"];
// Helper function to check if a role is in all_admin_roles
export const isAdminRole = (role: string): boolean => {
return all_admin_roles.includes(role);
};