diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 8b88643f1a..e345815fb2 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -4847,6 +4847,33 @@ "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, + "gemini/gemini-2.5-pro-preview-03-25": { + "max_tokens": 65536, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.0000007, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.0000010, + "output_cost_per_token_above_128k_tokens": 0.000015, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 10000, + "tpm": 10000000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview" + }, "gemini/gemini-2.0-flash-exp": { "max_tokens": 8192, "max_input_tokens": 1048576, @@ -6665,6 +6692,14 @@ "mode": "chat", "supports_tool_choice": true }, + "mistralai/mistral-small-3.1-24b-instruct": { + "max_tokens": 32000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000003, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_tool_choice": true + }, "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { "max_tokens": 32769, "input_cost_per_token": 0.0000005, @@ -6793,12 +6828,38 @@ "supports_vision": false, "supports_tool_choice": true }, + "openrouter/openai/o3-mini": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_tool_choice": true + }, + "openrouter/openai/o3-mini-high": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false, + "supports_tool_choice": true + }, "openrouter/openai/gpt-4o": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 0.000005, - "output_cost_per_token": 0.000015, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, diff --git a/litellm/proxy/management_endpoints/model_management_endpoints.py b/litellm/proxy/management_endpoints/model_management_endpoints.py index 3d93c9f4b3..42dd903e79 100644 --- a/litellm/proxy/management_endpoints/model_management_endpoints.py +++ b/litellm/proxy/management_endpoints/model_management_endpoints.py @@ -13,7 +13,7 @@ model/{model_id}/update - PATCH endpoint for model update. import asyncio import json import uuid -from typing import Literal, Optional, Union, cast +from typing import Dict, List, Literal, Optional, Union, cast from fastapi import APIRouter, Depends, HTTPException, Request, status from pydantic import BaseModel @@ -846,3 +846,24 @@ async def update_model( param=getattr(e, "param", "None"), code=status.HTTP_400_BAD_REQUEST, ) + + +def _deduplicate_litellm_router_models(models: List[Dict]) -> List[Dict]: + """ + Deduplicate models based on their model_info.id field. + Returns a list of unique models keeping only the first occurrence of each model ID. + + Args: + models: List of model dictionaries containing model_info + + Returns: + List of deduplicated model dictionaries + """ + seen_ids = set() + unique_models = [] + for model in models: + model_id = model.get("model_info", {}).get("id", None) + if model_id is not None and model_id not in seen_ids: + unique_models.append(model) + seen_ids.add(model_id) + return unique_models diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e1982a3ca0..dc145aebc1 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -229,6 +229,7 @@ from litellm.proxy.management_endpoints.key_management_endpoints import ( from litellm.proxy.management_endpoints.model_management_endpoints import ( _add_model_to_db, _add_team_model_to_db, + _deduplicate_litellm_router_models, ) from litellm.proxy.management_endpoints.model_management_endpoints import ( router as model_management_router, @@ -5371,6 +5372,7 @@ async def non_admin_all_models( detail={"error": CommonProxyErrors.db_not_connected_error.value}, ) + # Get all models that are user-added, when model created_by == user_api_key_dict.user_id all_models = await _check_if_model_is_user_added( models=all_models, user_api_key_dict=user_api_key_dict, @@ -5385,12 +5387,15 @@ async def non_admin_all_models( except Exception: raise HTTPException(status_code=400, detail={"error": "User not found"}) + # Get all models that are team models, when model team_id == user_row.teams all_models += _check_if_model_is_team_model( models=llm_router.get_model_list() or [], user_row=user_row, ) - return all_models + # de-duplicate models. Only return unique model ids + unique_models = _deduplicate_litellm_router_models(models=all_models) + return unique_models @router.get(