bug fix de depluciate model list (#9775)

This commit is contained in:
Ishaan Jaff 2025-04-05 12:29:11 -07:00 committed by GitHub
parent 34bdf36eab
commit 3a7061a05c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 91 additions and 4 deletions

View file

@ -4847,6 +4847,33 @@
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
"gemini/gemini-2.5-pro-preview-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.0000007,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.0000025,
"output_cost_per_token": 0.0000010,
"output_cost_per_token_above_128k_tokens": 0.000015,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 10000,
"tpm": 10000000,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
},
"gemini/gemini-2.0-flash-exp": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
@ -6665,6 +6692,14 @@
"mode": "chat",
"supports_tool_choice": true
},
"mistralai/mistral-small-3.1-24b-instruct": {
"max_tokens": 32000,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000003,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_tool_choice": true
},
"openrouter/cognitivecomputations/dolphin-mixtral-8x7b": {
"max_tokens": 32769,
"input_cost_per_token": 0.0000005,
@ -6793,12 +6828,38 @@
"supports_vision": false,
"supports_tool_choice": true
},
"openrouter/openai/o3-mini": {
"max_tokens": 65536,
"max_input_tokens": 128000,
"max_output_tokens": 65536,
"input_cost_per_token": 0.0000011,
"output_cost_per_token": 0.0000044,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_tool_choice": true
},
"openrouter/openai/o3-mini-high": {
"max_tokens": 65536,
"max_input_tokens": 128000,
"max_output_tokens": 65536,
"input_cost_per_token": 0.0000011,
"output_cost_per_token": 0.0000044,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": false,
"supports_tool_choice": true
},
"openrouter/openai/gpt-4o": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,

View file

@ -13,7 +13,7 @@ model/{model_id}/update - PATCH endpoint for model update.
import asyncio
import json
import uuid
from typing import Literal, Optional, Union, cast
from typing import Dict, List, Literal, Optional, Union, cast
from fastapi import APIRouter, Depends, HTTPException, Request, status
from pydantic import BaseModel
@ -846,3 +846,24 @@ async def update_model(
param=getattr(e, "param", "None"),
code=status.HTTP_400_BAD_REQUEST,
)
def _deduplicate_litellm_router_models(models: List[Dict]) -> List[Dict]:
"""
Deduplicate models based on their model_info.id field.
Returns a list of unique models keeping only the first occurrence of each model ID.
Args:
models: List of model dictionaries containing model_info
Returns:
List of deduplicated model dictionaries
"""
seen_ids = set()
unique_models = []
for model in models:
model_id = model.get("model_info", {}).get("id", None)
if model_id is not None and model_id not in seen_ids:
unique_models.append(model)
seen_ids.add(model_id)
return unique_models

View file

@ -229,6 +229,7 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
from litellm.proxy.management_endpoints.model_management_endpoints import (
_add_model_to_db,
_add_team_model_to_db,
_deduplicate_litellm_router_models,
)
from litellm.proxy.management_endpoints.model_management_endpoints import (
router as model_management_router,
@ -5371,6 +5372,7 @@ async def non_admin_all_models(
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
# Get all models that are user-added, when model created_by == user_api_key_dict.user_id
all_models = await _check_if_model_is_user_added(
models=all_models,
user_api_key_dict=user_api_key_dict,
@ -5385,12 +5387,15 @@ async def non_admin_all_models(
except Exception:
raise HTTPException(status_code=400, detail={"error": "User not found"})
# Get all models that are team models, when model team_id == user_row.teams
all_models += _check_if_model_is_team_model(
models=llm_router.get_model_list() or [],
user_row=user_row,
)
return all_models
# de-duplicate models. Only return unique model ids
unique_models = _deduplicate_litellm_router_models(models=all_models)
return unique_models
@router.get(