The request you would send to LiteLLM /chat/completions endpoint.
+How LiteLLM transforms your request for the specified provider.
+Note: Sensitive headers are not shown.
+{transformedRequest}+ +
The request you would send to LiteLLM /chat/completions endpoint.
+How LiteLLM transforms your request for the specified provider.
+Note: Sensitive headers are not shown.
+{transformedRequest}+ +
Results from the SSO authentication process.
+ +The SSO authentication completed successfully. Below is the information returned by the provider.
+Loading...+
No data was returned from the SSO provider.
", + status_code=400, + ) + + # Convert the OpenID object to a dictionary + if hasattr(result, "__dict__"): + result_dict = result.__dict__ + else: + result_dict = dict(result) + + # Filter out any None values and convert to JSON serializable format + filtered_result = {} + for key, value in result_dict.items(): + if value is not None and not key.startswith("_"): + if isinstance(value, (str, int, float, bool)) or value is None: + filtered_result[key] = value + else: + try: + # Try to convert to string or another JSON serializable format + filtered_result[key] = str(value) + except Exception as e: + filtered_result[key] = f"Complex value (not displayable): {str(e)}" + + # Replace the placeholder in the template with the actual data + html_content = jwt_display_template.replace( + "const userData = SSO_DATA;", + f"const userData = {json.dumps(filtered_result, indent=2)};", + ) + + return HTMLResponse(content=html_content) diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index a6b1b3e614..563d0cb543 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -4,16 +4,26 @@ import json import uuid from base64 import b64encode from datetime import datetime -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Tuple, Union from urllib.parse import parse_qs, urlencode, urlparse import httpx -from fastapi import APIRouter, Depends, HTTPException, Request, Response, status +from fastapi import ( + APIRouter, + Depends, + HTTPException, + Request, + Response, + UploadFile, + status, +) from fastapi.responses import StreamingResponse +from starlette.datastructures import UploadFile as StarletteUploadFile import litellm from litellm._logging import verbose_proxy_logger from litellm.integrations.custom_logger import CustomLogger +from litellm.litellm_core_utils.safe_json_dumps import safe_dumps from litellm.llms.custom_httpx.http_handler import get_async_httpx_client from litellm.proxy._types import ( ConfigFieldInfo, @@ -358,6 +368,92 @@ class HttpPassThroughEndpointHelpers: ) return response + @staticmethod + async def non_streaming_http_request_handler( + request: Request, + async_client: httpx.AsyncClient, + url: httpx.URL, + headers: dict, + requested_query_params: Optional[dict] = None, + _parsed_body: Optional[dict] = None, + ) -> httpx.Response: + """ + Handle non-streaming HTTP requests + + Handles special cases when GET requests, multipart/form-data requests, and generic httpx requests + """ + if request.method == "GET": + response = await async_client.request( + method=request.method, + url=url, + headers=headers, + params=requested_query_params, + ) + elif HttpPassThroughEndpointHelpers.is_multipart(request) is True: + return await HttpPassThroughEndpointHelpers.make_multipart_http_request( + request=request, + async_client=async_client, + url=url, + headers=headers, + requested_query_params=requested_query_params, + ) + else: + # Generic httpx method + response = await async_client.request( + method=request.method, + url=url, + headers=headers, + params=requested_query_params, + json=_parsed_body, + ) + return response + + @staticmethod + def is_multipart(request: Request) -> bool: + """Check if the request is a multipart/form-data request""" + return "multipart/form-data" in request.headers.get("content-type", "") + + @staticmethod + async def _build_request_files_from_upload_file( + upload_file: Union[UploadFile, StarletteUploadFile], + ) -> Tuple[Optional[str], bytes, Optional[str]]: + """Build a request files dict from an UploadFile object""" + file_content = await upload_file.read() + return (upload_file.filename, file_content, upload_file.content_type) + + @staticmethod + async def make_multipart_http_request( + request: Request, + async_client: httpx.AsyncClient, + url: httpx.URL, + headers: dict, + requested_query_params: Optional[dict] = None, + ) -> httpx.Response: + """Process multipart/form-data requests, handling both files and form fields""" + form_data = await request.form() + files = {} + form_data_dict = {} + + for field_name, field_value in form_data.items(): + if isinstance(field_value, (StarletteUploadFile, UploadFile)): + files[field_name] = ( + await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file( + upload_file=field_value + ) + ) + else: + form_data_dict[field_name] = field_value + + response = await async_client.request( + method=request.method, + url=url, + headers=headers, + params=requested_query_params, + files=files, + data=form_data_dict, + ) + return response + async def pass_through_request( # noqa: PLR0915 request: Request, @@ -424,7 +520,7 @@ async def pass_through_request( # noqa: PLR0915 start_time = datetime.now() logging_obj = Logging( model="unknown", - messages=[{"role": "user", "content": json.dumps(_parsed_body)}], + messages=[{"role": "user", "content": safe_dumps(_parsed_body)}], stream=False, call_type="pass_through_endpoint", start_time=start_time, @@ -453,7 +549,6 @@ async def pass_through_request( # noqa: PLR0915 logging_obj.model_call_details["litellm_call_id"] = litellm_call_id # combine url with query params for logging - requested_query_params: Optional[dict] = ( query_params or request.query_params.__dict__ ) @@ -474,7 +569,7 @@ async def pass_through_request( # noqa: PLR0915 logging_url = str(url) + "?" + requested_query_params_str logging_obj.pre_call( - input=[{"role": "user", "content": json.dumps(_parsed_body)}], + input=[{"role": "user", "content": safe_dumps(_parsed_body)}], api_key="", additional_args={ "complete_input_dict": _parsed_body, @@ -525,22 +620,16 @@ async def pass_through_request( # noqa: PLR0915 ) verbose_proxy_logger.debug("request body: {}".format(_parsed_body)) - if request.method == "GET": - response = await async_client.request( - method=request.method, + response = ( + await HttpPassThroughEndpointHelpers.non_streaming_http_request_handler( + request=request, + async_client=async_client, url=url, headers=headers, - params=requested_query_params, + requested_query_params=requested_query_params, + _parsed_body=_parsed_body, ) - else: - response = await async_client.request( - method=request.method, - url=url, - headers=headers, - params=requested_query_params, - json=_parsed_body, - ) - + ) verbose_proxy_logger.debug("response.headers= %s", response.headers) if _is_streaming_response(response) is True: diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 709cf08729..23de923db7 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -10,11 +10,12 @@ model_list: api_key: fake-key litellm_settings: - require_auth_for_metrics_endpoint: true - - - callbacks: ["prometheus"] - service_callback: ["prometheus_system"] - -router_settings: - enable_tag_filtering: True # 👈 Key Change \ No newline at end of file + default_team_settings: + - team_id: test_dev + success_callback: ["langfuse", "s3"] + langfuse_secret: secret-test-key + langfuse_public_key: public-test-key + - team_id: my_workflows + success_callback: ["langfuse", "s3"] + langfuse_secret: secret-workflows-key + langfuse_public_key: public-workflows-key diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index c270d41cf0..ddfb7118d7 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -139,6 +139,7 @@ from litellm.litellm_core_utils.core_helpers import ( ) from litellm.litellm_core_utils.credential_accessor import CredentialAccessor from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.proxy._experimental.mcp_server.server import router as mcp_router from litellm.proxy._experimental.mcp_server.tool_registry import ( @@ -387,6 +388,7 @@ global_max_parallel_request_retries_env: Optional[str] = os.getenv( "LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES" ) proxy_state = ProxyState() +SENSITIVE_DATA_MASKER = SensitiveDataMasker() if global_max_parallel_request_retries_env is None: global_max_parallel_request_retries: int = 3 else: @@ -1397,7 +1399,9 @@ class ProxyConfig: team_config: dict = {} for team in all_teams_config: if "team_id" not in team: - raise Exception(f"team_id missing from team: {team}") + raise Exception( + f"team_id missing from team: {SENSITIVE_DATA_MASKER.mask_dict(team)}" + ) if team_id == team["team_id"]: team_config = team break diff --git a/litellm/proxy/types_utils/utils.py b/litellm/proxy/types_utils/utils.py index 788849b3d5..f3dbfda6b2 100644 --- a/litellm/proxy/types_utils/utils.py +++ b/litellm/proxy/types_utils/utils.py @@ -14,6 +14,9 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any: module_name = ".".join(parts[:-1]) instance_name = parts[-1] + # Security: Check if the module name contains any dangerous modules that can execute arbitrary code + security_checks(module_name=module_name) + # If config_file_path is provided, use it to determine the module spec and load the module if config_file_path is not None: directory = os.path.dirname(config_file_path) @@ -47,6 +50,35 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any: raise e +def security_checks( + module_name: str, +): + """ + This function checks if the module name contains any dangerous modules that can execute arbitrary code. + + Reference: https://huntr.com/bounties/1d98bebb-6cf4-46c9-87c3-d3b1972973b5 + """ + DANGEROUS_MODULES = [ + "os", + "sys", + "subprocess", + "shutil", + "socket", + "multiprocessing", + "threading", + "ctypes", + "pickle", + "marshal", + "builtins", + "__builtin__", + ] + # Security: Check if the module name contains any dangerous modules + if any(dangerous in module_name.lower() for dangerous in DANGEROUS_MODULES): + raise ImportError( + f"Importing from module {module_name} is not allowed for security reasons" + ) + + def validate_custom_validate_return_type( fn: Optional[Callable[..., Any]] ) -> Optional[Callable[..., Literal[True]]]: diff --git a/litellm/types/llms/custom_http.py b/litellm/types/llms/custom_http.py index 5eec187dd4..8759dedec6 100644 --- a/litellm/types/llms/custom_http.py +++ b/litellm/types/llms/custom_http.py @@ -19,6 +19,7 @@ class httpxSpecialProvider(str, Enum): SecretManager = "secret_manager" PassThroughEndpoint = "pass_through_endpoint" PromptFactory = "prompt_factory" + SSO_HANDLER = "sso_handler" VerifyTypes = Union[str, bool, ssl.SSLContext] diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py index 7fa167938f..55273371fc 100644 --- a/litellm/types/llms/vertex_ai.py +++ b/litellm/types/llms/vertex_ai.py @@ -187,6 +187,7 @@ class Tools(TypedDict, total=False): function_declarations: List[FunctionDeclaration] googleSearch: dict googleSearchRetrieval: dict + enterpriseWebSearch: dict code_execution: dict retrieval: Retrieval @@ -497,6 +498,51 @@ class OutputConfig(TypedDict, total=False): gcsDestination: GcsDestination +class GcsBucketResponse(TypedDict): + """ + TypedDict for GCS bucket upload response + + Attributes: + kind: The kind of item this is. For objects, this is always storage#object + id: The ID of the object + selfLink: The link to this object + mediaLink: The link to download the object + name: The name of the object + bucket: The name of the bucket containing this object + generation: The content generation of this object + metageneration: The metadata generation of this object + contentType: The content type of the object + storageClass: The storage class of the object + size: The size of the object in bytes + md5Hash: The MD5 hash of the object + crc32c: The CRC32c checksum of the object + etag: The ETag of the object + timeCreated: The creation time of the object + updated: The last update time of the object + timeStorageClassUpdated: The time the storage class was last updated + timeFinalized: The time the object was finalized + """ + + kind: Literal["storage#object"] + id: str + selfLink: str + mediaLink: str + name: str + bucket: str + generation: str + metageneration: str + contentType: str + storageClass: str + size: str + md5Hash: str + crc32c: str + etag: str + timeCreated: str + updated: str + timeStorageClassUpdated: str + timeFinalized: str + + class VertexAIBatchPredictionJob(TypedDict): displayName: str model: str diff --git a/litellm/types/proxy/management_endpoints/ui_sso.py b/litellm/types/proxy/management_endpoints/ui_sso.py new file mode 100644 index 0000000000..ca17c47006 --- /dev/null +++ b/litellm/types/proxy/management_endpoints/ui_sso.py @@ -0,0 +1,27 @@ +from typing import List, Optional, TypedDict + + +class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False): + """Model for Microsoft Graph API directory object""" + + odata_type: Optional[str] + id: Optional[str] + deletedDateTime: Optional[str] + description: Optional[str] + displayName: Optional[str] + roleTemplateId: Optional[str] + + +class MicrosoftGraphAPIUserGroupResponse(TypedDict, total=False): + """Model for Microsoft Graph API user groups response""" + + odata_context: Optional[str] + odata_nextLink: Optional[str] + value: Optional[List[MicrosoftGraphAPIUserGroupDirectoryObject]] + + +class MicrosoftServicePrincipalTeam(TypedDict, total=False): + """Model for Microsoft Service Principal Team""" + + principalDisplayName: Optional[str] + principalId: Optional[str] diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 8439037758..3af5ca157e 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2,7 +2,7 @@ import json import time import uuid from enum import Enum -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Union from aiohttp import FormData from openai._models import BaseModel as OpenAIObject @@ -120,6 +120,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): input_cost_per_character: Optional[float] # only for vertex ai models input_cost_per_audio_token: Optional[float] input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models + input_cost_per_token_above_200k_tokens: Optional[ + float + ] # only for vertex ai gemini-2.5-pro models input_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models @@ -136,6 +139,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): output_cost_per_token_above_128k_tokens: Optional[ float ] # only for vertex ai models + output_cost_per_token_above_200k_tokens: Optional[ + float + ] # only for vertex ai gemini-2.5-pro models output_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models @@ -2170,3 +2176,20 @@ class CreateCredentialItem(CredentialBase): if not values.get("credential_values") and not values.get("model_id"): raise ValueError("Either credential_values or model_id must be set") return values + + +class ExtractedFileData(TypedDict): + """ + TypedDict for storing processed file data + + Attributes: + filename: Name of the file if provided + content: The file content in bytes + content_type: MIME type of the file + headers: Any additional headers for the file + """ + + filename: Optional[str] + content: bytes + content_type: Optional[str] + headers: Mapping[str, str] diff --git a/litellm/utils.py b/litellm/utils.py index f807990f60..9ebe0f4b09 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4532,6 +4532,9 @@ def _get_model_info_helper( # noqa: PLR0915 input_cost_per_token_above_128k_tokens=_model_info.get( "input_cost_per_token_above_128k_tokens", None ), + input_cost_per_token_above_200k_tokens=_model_info.get( + "input_cost_per_token_above_200k_tokens", None + ), input_cost_per_query=_model_info.get("input_cost_per_query", None), input_cost_per_second=_model_info.get("input_cost_per_second", None), input_cost_per_audio_token=_model_info.get( @@ -4556,6 +4559,9 @@ def _get_model_info_helper( # noqa: PLR0915 output_cost_per_character_above_128k_tokens=_model_info.get( "output_cost_per_character_above_128k_tokens", None ), + output_cost_per_token_above_200k_tokens=_model_info.get( + "output_cost_per_token_above_200k_tokens", None + ), output_cost_per_second=_model_info.get("output_cost_per_second", None), output_cost_per_image=_model_info.get("output_cost_per_image", None), output_vector_size=_model_info.get("output_vector_size", None), @@ -6517,6 +6523,10 @@ class ProviderConfigManager: ) return GoogleAIStudioFilesHandler() + elif LlmProviders.VERTEX_AI == provider: + from litellm.llms.vertex_ai.files.transformation import VertexAIFilesConfig + + return VertexAIFilesConfig() return None diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 79aa57f466..c67c3b85af 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -380,6 +380,7 @@ "supports_tool_choice": true, "supports_native_streaming": false, "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] }, "o1-pro-2025-03-19": { @@ -401,6 +402,7 @@ "supports_tool_choice": true, "supports_native_streaming": false, "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] }, "o1": { @@ -2409,25 +2411,26 @@ "max_tokens": 4096, "max_input_tokens": 131072, "max_output_tokens": 4096, - "input_cost_per_token": 0, - "output_cost_per_token": 0, + "input_cost_per_token": 0.000000075, + "output_cost_per_token": 0.0000003, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, - "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft" + "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112" }, "azure_ai/Phi-4-multimodal-instruct": { "max_tokens": 4096, "max_input_tokens": 131072, "max_output_tokens": 4096, - "input_cost_per_token": 0, - "output_cost_per_token": 0, + "input_cost_per_token": 0.00000008, + "input_cost_per_audio_token": 0.000004, + "output_cost_per_token": 0.00032, "litellm_provider": "azure_ai", "mode": "chat", "supports_audio_input": true, "supports_function_calling": true, "supports_vision": true, - "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft" + "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112" }, "azure_ai/Phi-4": { "max_tokens": 16384, @@ -3467,7 +3470,7 @@ "input_cost_per_token": 0.0000008, "output_cost_per_token": 0.000004, "cache_creation_input_token_cost": 0.000001, - "cache_read_input_token_cost": 0.0000008, + "cache_read_input_token_cost": 0.00000008, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -4511,20 +4514,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0, - "input_cost_per_video_per_second": 0, - "input_cost_per_audio_per_second": 0, - "input_cost_per_token": 0, - "input_cost_per_character": 0, - "input_cost_per_token_above_128k_tokens": 0, - "input_cost_per_character_above_128k_tokens": 0, - "input_cost_per_image_above_128k_tokens": 0, - "input_cost_per_video_per_second_above_128k_tokens": 0, - "input_cost_per_audio_per_second_above_128k_tokens": 0, - "output_cost_per_token": 0, - "output_cost_per_character": 0, - "output_cost_per_token_above_128k_tokens": 0, - "output_cost_per_character_above_128k_tokens": 0, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -4535,6 +4528,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-pro-exp-02-05": { @@ -4547,20 +4543,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0, - "input_cost_per_video_per_second": 0, - "input_cost_per_audio_per_second": 0, - "input_cost_per_token": 0, - "input_cost_per_character": 0, - "input_cost_per_token_above_128k_tokens": 0, - "input_cost_per_character_above_128k_tokens": 0, - "input_cost_per_image_above_128k_tokens": 0, - "input_cost_per_video_per_second_above_128k_tokens": 0, - "input_cost_per_audio_per_second_above_128k_tokens": 0, - "output_cost_per_token": 0, - "output_cost_per_character": 0, - "output_cost_per_token_above_128k_tokens": 0, - "output_cost_per_character_above_128k_tokens": 0, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -4571,6 +4557,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-flash-exp": { @@ -4604,6 +4593,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_tool_choice": true }, @@ -4628,6 +4619,8 @@ "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-flash-thinking-exp": { @@ -4661,6 +4654,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4695,6 +4690,8 @@ "supports_vision": true, "supports_response_schema": false, "supports_audio_output": false, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4720,6 +4717,7 @@ "supports_audio_output": true, "supports_audio_input": true, "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, @@ -4742,6 +4740,32 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supports_tool_choice": true + }, + "gemini-2.0-flash-lite-001": { + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 50, + "input_cost_per_audio_token": 0.000000075, + "input_cost_per_token": 0.000000075, + "output_cost_per_token": 0.0000003, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4807,6 +4831,7 @@ "supports_audio_output": true, "supports_audio_input": true, "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, @@ -4832,6 +4857,8 @@ "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite" }, "gemini/gemini-2.0-flash-001": { @@ -4857,6 +4884,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://ai.google.dev/pricing#2_0flash" }, "gemini/gemini-2.5-pro-preview-03-25": { @@ -4871,9 +4900,9 @@ "max_pdf_size_mb": 30, "input_cost_per_audio_token": 0.0000007, "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_token_above_200k_tokens": 0.0000025, "output_cost_per_token": 0.0000010, - "output_cost_per_token_above_128k_tokens": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -4884,6 +4913,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview" }, "gemini/gemini-2.0-flash-exp": { @@ -4919,6 +4950,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4945,6 +4978,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite" }, "gemini/gemini-2.0-flash-thinking-exp": { @@ -4980,6 +5015,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -5016,6 +5053,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, diff --git a/pyproject.toml b/pyproject.toml index c67d7da6c5..bde347f860 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.65.4" +version = "1.65.5" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.65.4" +version = "1.65.5" version_files = [ "pyproject.toml:^version" ] diff --git a/tests/batches_tests/test_openai_batches_and_files.py b/tests/batches_tests/test_openai_batches_and_files.py index 4669a2def6..b2826419e8 100644 --- a/tests/batches_tests/test_openai_batches_and_files.py +++ b/tests/batches_tests/test_openai_batches_and_files.py @@ -423,25 +423,35 @@ mock_vertex_batch_response = { @pytest.mark.asyncio -async def test_avertex_batch_prediction(): - with patch( +async def test_avertex_batch_prediction(monkeypatch): + monkeypatch.setenv("GCS_BUCKET_NAME", "litellm-local") + from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler + + client = AsyncHTTPHandler() + + async def mock_side_effect(*args, **kwargs): + print("args", args, "kwargs", kwargs) + url = kwargs.get("url", "") + if "files" in url: + mock_response.json.return_value = mock_file_response + elif "batch" in url: + mock_response.json.return_value = mock_vertex_batch_response + mock_response.status_code = 200 + return mock_response + + with patch.object( + client, "post", side_effect=mock_side_effect + ) as mock_post, patch( "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post" - ) as mock_post: + ) as mock_global_post: # Configure mock responses mock_response = MagicMock() mock_response.raise_for_status.return_value = None # Set up different responses for different API calls - async def mock_side_effect(*args, **kwargs): - url = kwargs.get("url", "") - if "files" in url: - mock_response.json.return_value = mock_file_response - elif "batch" in url: - mock_response.json.return_value = mock_vertex_batch_response - mock_response.status_code = 200 - return mock_response - + mock_post.side_effect = mock_side_effect + mock_global_post.side_effect = mock_side_effect # load_vertex_ai_credentials() litellm.set_verbose = True @@ -455,6 +465,7 @@ async def test_avertex_batch_prediction(): file=open(file_path, "rb"), purpose="batch", custom_llm_provider="vertex_ai", + client=client ) print("Response from creating file=", file_obj) diff --git a/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py new file mode 100644 index 0000000000..8f8f043935 --- /dev/null +++ b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py @@ -0,0 +1,50 @@ +import json +import os +import sys + +import pytest +from fastapi.testclient import TestClient + +import litellm +from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import ( + StandardBuiltInToolCostTracking, +) +from litellm.types.llms.openai import FileSearchTool, WebSearchOptions +from litellm.types.utils import ModelInfo, ModelResponse, StandardBuiltInToolsParams + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + +from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token +from litellm.types.utils import Usage + + +def test_generic_cost_per_token_above_200k_tokens(): + model = "gemini-2.5-pro-exp-03-25" + custom_llm_provider = "vertex_ai" + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + model_cost_map = litellm.model_cost[model] + prompt_tokens = 220 * 1e6 + completion_tokens = 150 + usage = Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ) + prompt_cost, completion_cost = generic_cost_per_token( + model=model, + usage=usage, + custom_llm_provider=custom_llm_provider, + ) + assert round(prompt_cost, 10) == round( + model_cost_map["input_cost_per_token_above_200k_tokens"] * usage.prompt_tokens, + 10, + ) + assert round(completion_cost, 10) == round( + model_cost_map["output_cost_per_token_above_200k_tokens"] + * usage.completion_tokens, + 10, + ) diff --git a/tests/litellm/proxy/management_endpoints/test_ui_sso.py b/tests/litellm/proxy/management_endpoints/test_ui_sso.py index 7ad520f7d5..606f3833be 100644 --- a/tests/litellm/proxy/management_endpoints/test_ui_sso.py +++ b/tests/litellm/proxy/management_endpoints/test_ui_sso.py @@ -1,3 +1,4 @@ +import asyncio import json import os import sys @@ -5,15 +6,23 @@ from typing import Optional, cast from unittest.mock import MagicMock, patch import pytest +from fastapi import Request from fastapi.testclient import TestClient sys.path.insert( - 0, os.path.abspath("../../..") + 0, os.path.abspath("../../../") ) # Adds the parent directory to the system path from litellm.proxy.auth.handle_jwt import JWTHandler from litellm.proxy.management_endpoints.types import CustomOpenID -from litellm.proxy.management_endpoints.ui_sso import MicrosoftSSOHandler +from litellm.proxy.management_endpoints.ui_sso import ( + GoogleSSOHandler, + MicrosoftSSOHandler, +) +from litellm.types.proxy.management_endpoints.ui_sso import ( + MicrosoftGraphAPIUserGroupDirectoryObject, + MicrosoftGraphAPIUserGroupResponse, +) def test_microsoft_sso_handler_openid_from_response(): @@ -27,23 +36,14 @@ def test_microsoft_sso_handler_openid_from_response(): "surname": "User", "some_other_field": "value", } - - # Create a mock JWTHandler that returns predetermined team IDs - mock_jwt_handler = MagicMock(spec=JWTHandler) expected_team_ids = ["team1", "team2"] - mock_jwt_handler.get_team_ids_from_jwt.return_value = expected_team_ids - # Act # Call the method being tested result = MicrosoftSSOHandler.openid_from_response( - response=mock_response, jwt_handler=mock_jwt_handler + response=mock_response, team_ids=expected_team_ids ) # Assert - # Verify the JWT handler was called with the correct parameters - mock_jwt_handler.get_team_ids_from_jwt.assert_called_once_with( - cast(dict, mock_response) - ) # Check that the result is a CustomOpenID object with the expected values assert isinstance(result, CustomOpenID) @@ -59,13 +59,9 @@ def test_microsoft_sso_handler_openid_from_response(): def test_microsoft_sso_handler_with_empty_response(): # Arrange # Test with None response - mock_jwt_handler = MagicMock(spec=JWTHandler) - mock_jwt_handler.get_team_ids_from_jwt.return_value = [] # Act - result = MicrosoftSSOHandler.openid_from_response( - response=None, jwt_handler=mock_jwt_handler - ) + result = MicrosoftSSOHandler.openid_from_response(response=None, team_ids=[]) # Assert assert isinstance(result, CustomOpenID) @@ -77,5 +73,309 @@ def test_microsoft_sso_handler_with_empty_response(): assert result.last_name is None assert result.team_ids == [] - # Make sure the JWT handler was called with an empty dict - mock_jwt_handler.get_team_ids_from_jwt.assert_called_once_with({}) + +def test_get_microsoft_callback_response(): + # Arrange + mock_request = MagicMock(spec=Request) + mock_response = { + "mail": "microsoft_user@example.com", + "displayName": "Microsoft User", + "id": "msft123", + "givenName": "Microsoft", + "surname": "User", + } + + future = asyncio.Future() + future.set_result(mock_response) + + with patch.dict( + os.environ, + {"MICROSOFT_CLIENT_SECRET": "mock_secret", "MICROSOFT_TENANT": "mock_tenant"}, + ): + with patch( + "fastapi_sso.sso.microsoft.MicrosoftSSO.verify_and_process", + return_value=future, + ): + # Act + result = asyncio.run( + MicrosoftSSOHandler.get_microsoft_callback_response( + request=mock_request, + microsoft_client_id="mock_client_id", + redirect_url="http://mock_redirect_url", + ) + ) + + # Assert + assert isinstance(result, CustomOpenID) + assert result.email == "microsoft_user@example.com" + assert result.display_name == "Microsoft User" + assert result.provider == "microsoft" + assert result.id == "msft123" + assert result.first_name == "Microsoft" + assert result.last_name == "User" + + +def test_get_microsoft_callback_response_raw_sso_response(): + # Arrange + mock_request = MagicMock(spec=Request) + mock_response = { + "mail": "microsoft_user@example.com", + "displayName": "Microsoft User", + "id": "msft123", + "givenName": "Microsoft", + "surname": "User", + } + + future = asyncio.Future() + future.set_result(mock_response) + with patch.dict( + os.environ, + {"MICROSOFT_CLIENT_SECRET": "mock_secret", "MICROSOFT_TENANT": "mock_tenant"}, + ): + with patch( + "fastapi_sso.sso.microsoft.MicrosoftSSO.verify_and_process", + return_value=future, + ): + # Act + result = asyncio.run( + MicrosoftSSOHandler.get_microsoft_callback_response( + request=mock_request, + microsoft_client_id="mock_client_id", + redirect_url="http://mock_redirect_url", + return_raw_sso_response=True, + ) + ) + + # Assert + print("result from verify_and_process", result) + assert isinstance(result, dict) + assert result["mail"] == "microsoft_user@example.com" + assert result["displayName"] == "Microsoft User" + assert result["id"] == "msft123" + assert result["givenName"] == "Microsoft" + assert result["surname"] == "User" + + +def test_get_google_callback_response(): + # Arrange + mock_request = MagicMock(spec=Request) + mock_response = { + "email": "google_user@example.com", + "name": "Google User", + "sub": "google123", + "given_name": "Google", + "family_name": "User", + } + + future = asyncio.Future() + future.set_result(mock_response) + + with patch.dict(os.environ, {"GOOGLE_CLIENT_SECRET": "mock_secret"}): + with patch( + "fastapi_sso.sso.google.GoogleSSO.verify_and_process", return_value=future + ): + # Act + result = asyncio.run( + GoogleSSOHandler.get_google_callback_response( + request=mock_request, + google_client_id="mock_client_id", + redirect_url="http://mock_redirect_url", + ) + ) + + # Assert + assert isinstance(result, dict) + assert result.get("email") == "google_user@example.com" + assert result.get("name") == "Google User" + assert result.get("sub") == "google123" + assert result.get("given_name") == "Google" + assert result.get("family_name") == "User" + + +@pytest.mark.asyncio +async def test_get_user_groups_from_graph_api(): + # Arrange + mock_response = { + "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects", + "value": [ + { + "@odata.type": "#microsoft.graph.group", + "id": "group1", + "displayName": "Group 1", + }, + { + "@odata.type": "#microsoft.graph.group", + "id": "group2", + "displayName": "Group 2", + }, + ], + } + + async def mock_get(*args, **kwargs): + mock = MagicMock() + mock.json.return_value = mock_response + return mock + + with patch( + "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client" + ) as mock_client: + mock_client.return_value = MagicMock() + mock_client.return_value.get = mock_get + + # Act + result = await MicrosoftSSOHandler.get_user_groups_from_graph_api( + access_token="mock_token" + ) + + # Assert + assert isinstance(result, list) + assert len(result) == 2 + assert "group1" in result + assert "group2" in result + + +@pytest.mark.asyncio +async def test_get_user_groups_pagination(): + # Arrange + first_response = { + "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects", + "@odata.nextLink": "https://graph.microsoft.com/v1.0/me/memberOf?$skiptoken=page2", + "value": [ + { + "@odata.type": "#microsoft.graph.group", + "id": "group1", + "displayName": "Group 1", + }, + ], + } + second_response = { + "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects", + "value": [ + { + "@odata.type": "#microsoft.graph.group", + "id": "group2", + "displayName": "Group 2", + }, + ], + } + + responses = [first_response, second_response] + current_response = {"index": 0} + + async def mock_get(*args, **kwargs): + mock = MagicMock() + mock.json.return_value = responses[current_response["index"]] + current_response["index"] += 1 + return mock + + with patch( + "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client" + ) as mock_client: + mock_client.return_value = MagicMock() + mock_client.return_value.get = mock_get + + # Act + result = await MicrosoftSSOHandler.get_user_groups_from_graph_api( + access_token="mock_token" + ) + + # Assert + assert isinstance(result, list) + assert len(result) == 2 + assert "group1" in result + assert "group2" in result + assert current_response["index"] == 2 # Verify both pages were fetched + + +@pytest.mark.asyncio +async def test_get_user_groups_empty_response(): + # Arrange + mock_response = { + "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects", + "value": [], + } + + async def mock_get(*args, **kwargs): + mock = MagicMock() + mock.json.return_value = mock_response + return mock + + with patch( + "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client" + ) as mock_client: + mock_client.return_value = MagicMock() + mock_client.return_value.get = mock_get + + # Act + result = await MicrosoftSSOHandler.get_user_groups_from_graph_api( + access_token="mock_token" + ) + + # Assert + assert isinstance(result, list) + assert len(result) == 0 + + +@pytest.mark.asyncio +async def test_get_user_groups_error_handling(): + # Arrange + async def mock_get(*args, **kwargs): + raise Exception("API Error") + + with patch( + "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client" + ) as mock_client: + mock_client.return_value = MagicMock() + mock_client.return_value.get = mock_get + + # Act + result = await MicrosoftSSOHandler.get_user_groups_from_graph_api( + access_token="mock_token" + ) + + # Assert + assert isinstance(result, list) + assert len(result) == 0 + + +def test_get_group_ids_from_graph_api_response(): + # Arrange + mock_response = MicrosoftGraphAPIUserGroupResponse( + odata_context="https://graph.microsoft.com/v1.0/$metadata#directoryObjects", + odata_nextLink=None, + value=[ + MicrosoftGraphAPIUserGroupDirectoryObject( + odata_type="#microsoft.graph.group", + id="group1", + displayName="Group 1", + description=None, + deletedDateTime=None, + roleTemplateId=None, + ), + MicrosoftGraphAPIUserGroupDirectoryObject( + odata_type="#microsoft.graph.group", + id="group2", + displayName="Group 2", + description=None, + deletedDateTime=None, + roleTemplateId=None, + ), + MicrosoftGraphAPIUserGroupDirectoryObject( + odata_type="#microsoft.graph.group", + id=None, # Test handling of None id + displayName="Invalid Group", + description=None, + deletedDateTime=None, + roleTemplateId=None, + ), + ], + ) + + # Act + result = MicrosoftSSOHandler._get_group_ids_from_graph_api_response(mock_response) + + # Assert + assert isinstance(result, list) + assert len(result) == 2 + assert "group1" in result + assert "group2" in result diff --git a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py new file mode 100644 index 0000000000..43d4dd9cd8 --- /dev/null +++ b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py @@ -0,0 +1,116 @@ +import json +import os +import sys +from io import BytesIO +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest +from fastapi import Request, UploadFile +from fastapi.testclient import TestClient +from starlette.datastructures import Headers +from starlette.datastructures import UploadFile as StarletteUploadFile + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + +from litellm.proxy.pass_through_endpoints.pass_through_endpoints import ( + HttpPassThroughEndpointHelpers, +) + + +# Test is_multipart +def test_is_multipart(): + # Test with multipart content type + request = MagicMock(spec=Request) + request.headers = Headers({"content-type": "multipart/form-data; boundary=123"}) + assert HttpPassThroughEndpointHelpers.is_multipart(request) is True + + # Test with non-multipart content type + request.headers = Headers({"content-type": "application/json"}) + assert HttpPassThroughEndpointHelpers.is_multipart(request) is False + + # Test with no content type + request.headers = Headers({}) + assert HttpPassThroughEndpointHelpers.is_multipart(request) is False + + +# Test _build_request_files_from_upload_file +@pytest.mark.asyncio +async def test_build_request_files_from_upload_file(): + # Test with FastAPI UploadFile + file_content = b"test content" + file = BytesIO(file_content) + # Create SpooledTemporaryFile with content type headers + headers = {"content-type": "text/plain"} + upload_file = UploadFile(file=file, filename="test.txt", headers=headers) + upload_file.read = AsyncMock(return_value=file_content) + + result = await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file( + upload_file + ) + assert result == ("test.txt", file_content, "text/plain") + + # Test with Starlette UploadFile + file2 = BytesIO(file_content) + starlette_file = StarletteUploadFile( + file=file2, + filename="test2.txt", + headers=Headers({"content-type": "text/plain"}), + ) + starlette_file.read = AsyncMock(return_value=file_content) + + result = await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file( + starlette_file + ) + assert result == ("test2.txt", file_content, "text/plain") + + +# Test make_multipart_http_request +@pytest.mark.asyncio +async def test_make_multipart_http_request(): + # Mock request with file and form field + request = MagicMock(spec=Request) + request.method = "POST" + + # Mock form data + file_content = b"test file content" + file = BytesIO(file_content) + # Create SpooledTemporaryFile with content type headers + headers = {"content-type": "text/plain"} + upload_file = UploadFile(file=file, filename="test.txt", headers=headers) + upload_file.read = AsyncMock(return_value=file_content) + + form_data = {"file": upload_file, "text_field": "test value"} + request.form = AsyncMock(return_value=form_data) + + # Mock httpx client + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {} + + async_client = MagicMock() + async_client.request = AsyncMock(return_value=mock_response) + + # Test the function + response = await HttpPassThroughEndpointHelpers.make_multipart_http_request( + request=request, + async_client=async_client, + url=httpx.URL("http://test.com"), + headers={}, + requested_query_params=None, + ) + + # Verify the response + assert response == mock_response + + # Verify the client call + async_client.request.assert_called_once() + call_args = async_client.request.call_args[1] + + assert call_args["method"] == "POST" + assert str(call_args["url"]) == "http://test.com" + assert isinstance(call_args["files"], dict) + assert isinstance(call_args["data"], dict) + assert call_args["data"]["text_field"] == "test value" diff --git a/tests/litellm/proxy/test_proxy_server.py b/tests/litellm/proxy/test_proxy_server.py index 1c05e80012..919a00d670 100644 --- a/tests/litellm/proxy/test_proxy_server.py +++ b/tests/litellm/proxy/test_proxy_server.py @@ -162,3 +162,30 @@ async def test_aaaproxy_startup_master_key(mock_prisma, monkeypatch, tmp_path): from litellm.proxy.proxy_server import master_key assert master_key == test_resolved_key + + +def test_team_info_masking(): + """ + Test that sensitive team information is properly masked + + Ref: https://huntr.com/bounties/661b388a-44d8-4ad5-862b-4dc5b80be30a + """ + from litellm.proxy.proxy_server import ProxyConfig + + proxy_config = ProxyConfig() + # Test team object with sensitive data + team1_info = { + "success_callback": "['langfuse', 's3']", + "langfuse_secret": "secret-test-key", + "langfuse_public_key": "public-test-key", + } + + with pytest.raises(Exception) as exc_info: + proxy_config._get_team_config( + team_id="test_dev", + all_teams_config=[team1_info], + ) + + print("Got exception: {}".format(exc_info.value)) + assert "secret-test-key" not in str(exc_info.value) + assert "public-test-key" not in str(exc_info.value) diff --git a/tests/litellm/proxy/types_utils/test_utils.py b/tests/litellm/proxy/types_utils/test_utils.py new file mode 100644 index 0000000000..5685489bfc --- /dev/null +++ b/tests/litellm/proxy/types_utils/test_utils.py @@ -0,0 +1,72 @@ +import json +import os +import sys + +import pytest +from fastapi.testclient import TestClient + +from litellm.proxy.types_utils.utils import security_checks + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + + +def test_security_checks_blocks_dangerous_modules(): + """ + Resolves: https://huntr.com/bounties/1d98bebb-6cf4-46c9-87c3-d3b1972973b5 + + This test checks if the security_checks function correctly blocks the import of dangerous modules. + """ + dangerous_module = "/usr/lib/python3/os.system" + with pytest.raises(ImportError) as exc_info: + security_checks(dangerous_module) + + assert "not allowed for security reasons" in str(exc_info.value) + assert dangerous_module in str(exc_info.value) + + +def test_security_checks_various_dangerous_modules(): + dangerous_modules = [ + "subprocess.run", + "socket.socket", + "pickle.loads", + "marshal.loads", + "ctypes.CDLL", + "builtins.eval", + "__builtin__.exec", + "shutil.rmtree", + "multiprocessing.Process", + "threading.Thread", + ] + + for module in dangerous_modules: + with pytest.raises(ImportError) as exc_info: + security_checks(module) + assert "not allowed for security reasons" in str(exc_info.value) + assert module in str(exc_info.value) + + +def test_security_checks_case_insensitive(): + # Test that the check is case-insensitive + variations = ["OS.system", "os.System", "Os.SyStEm", "SUBPROCESS.run"] + + for module in variations: + with pytest.raises(ImportError) as exc_info: + security_checks(module) + assert "not allowed for security reasons" in str(exc_info.value) + + +def test_security_checks_nested_paths(): + # Test nested paths that contain dangerous modules + nested_paths = [ + "some/path/to/os/system", + "myproject/utils/subprocess_wrapper", + "lib/helpers/socket_utils", + "../../../system/os.py", + ] + + for path in nested_paths: + with pytest.raises(ImportError) as exc_info: + security_checks(path) + assert "not allowed for security reasons" in str(exc_info.value) diff --git a/tests/llm_translation/test_huggingface_chat_completion.py b/tests/llm_translation/test_huggingface_chat_completion.py index 9f1e89aeb1..7d498b96df 100644 --- a/tests/llm_translation/test_huggingface_chat_completion.py +++ b/tests/llm_translation/test_huggingface_chat_completion.py @@ -323,7 +323,8 @@ class TestHuggingFace(BaseLLMChatTest): model="huggingface/fireworks-ai/meta-llama/Meta-Llama-3-8B-Instruct", messages=[{"role": "user", "content": "Hello"}], optional_params={}, - api_key="test_api_key" + api_key="test_api_key", + litellm_params={} ) assert headers["Authorization"] == "Bearer test_api_key" diff --git a/tests/llm_translation/test_vertex.py b/tests/llm_translation/test_vertex.py index d821fb415e..9118d94a6f 100644 --- a/tests/llm_translation/test_vertex.py +++ b/tests/llm_translation/test_vertex.py @@ -141,6 +141,7 @@ def test_build_vertex_schema(): [ ([{"googleSearch": {}}], "googleSearch"), ([{"googleSearchRetrieval": {}}], "googleSearchRetrieval"), + ([{"enterpriseWebSearch": {}}], "enterpriseWebSearch"), ([{"code_execution": {}}], "code_execution"), ], ) diff --git a/tests/local_testing/example.jsonl b/tests/local_testing/example.jsonl new file mode 100644 index 0000000000..fc3ca40808 --- /dev/null +++ b/tests/local_testing/example.jsonl @@ -0,0 +1,2 @@ +{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello world!"}], "max_tokens": 10}} +{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are an unhelpful assistant."}, {"role": "user", "content": "Hello world!"}], "max_tokens": 10}} diff --git a/tests/local_testing/test_gcs_bucket.py b/tests/local_testing/test_gcs_bucket.py index 1a8deed8a8..b64475c227 100644 --- a/tests/local_testing/test_gcs_bucket.py +++ b/tests/local_testing/test_gcs_bucket.py @@ -21,7 +21,7 @@ from litellm.integrations.gcs_bucket.gcs_bucket import ( StandardLoggingPayload, ) from litellm.types.utils import StandardCallbackDynamicParams - +from unittest.mock import patch verbose_logger.setLevel(logging.DEBUG) @@ -687,3 +687,63 @@ async def test_basic_gcs_logger_with_folder_in_bucket_name(): # clean up if old_bucket_name is not None: os.environ["GCS_BUCKET_NAME"] = old_bucket_name + +@pytest.mark.skip(reason="This test is flaky on ci/cd") +def test_create_file_e2e(): + """ + Asserts 'create_file' is called with the correct arguments + """ + load_vertex_ai_credentials() + test_file_content = b"test audio content" + test_file = ("test.wav", test_file_content, "audio/wav") + + from litellm import create_file + response = create_file( + file=test_file, + purpose="user_data", + custom_llm_provider="vertex_ai", + ) + print("response", response) + assert response is not None + +@pytest.mark.skip(reason="This test is flaky on ci/cd") +def test_create_file_e2e_jsonl(): + """ + Asserts 'create_file' is called with the correct arguments + """ + load_vertex_ai_credentials() + from litellm.llms.custom_httpx.http_handler import HTTPHandler + + client = HTTPHandler() + + example_jsonl = [{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}},{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}] + + # Create and write to the file + file_path = "example.jsonl" + with open(file_path, "w") as f: + for item in example_jsonl: + f.write(json.dumps(item) + "\n") + + # Verify file content + with open(file_path, "r") as f: + content = f.read() + print("File content:", content) + assert len(content) > 0, "File is empty" + + from litellm import create_file + with patch.object(client, "post") as mock_create_file: + try: + response = create_file( + file=open(file_path, "rb"), + purpose="user_data", + custom_llm_provider="vertex_ai", + client=client, + ) + except Exception as e: + print("error", e) + + mock_create_file.assert_called_once() + + print(f"kwargs: {mock_create_file.call_args.kwargs}") + + assert mock_create_file.call_args.kwargs["data"] is not None and len(mock_create_file.call_args.kwargs["data"]) > 0 \ No newline at end of file diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index f6fd790921..a8c19fde5a 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -431,6 +431,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "input_cost_per_character_above_128k_tokens": {"type": "number"}, "input_cost_per_image": {"type": "number"}, "input_cost_per_image_above_128k_tokens": {"type": "number"}, + "input_cost_per_token_above_200k_tokens": {"type": "number"}, "input_cost_per_pixel": {"type": "number"}, "input_cost_per_query": {"type": "number"}, "input_cost_per_request": {"type": "number"}, @@ -483,6 +484,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "output_cost_per_second": {"type": "number"}, "output_cost_per_token": {"type": "number"}, "output_cost_per_token_above_128k_tokens": {"type": "number"}, + "output_cost_per_token_above_200k_tokens": {"type": "number"}, "output_cost_per_token_batches": {"type": "number"}, "output_db_cost_per_token": {"type": "number"}, "output_dbu_cost_per_token": {"type": "number"}, @@ -541,6 +543,13 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "enum": ["text", "audio", "image", "video"], }, }, + "supported_output_modalities": { + "type": "array", + "items": { + "type": "string", + "enum": ["text", "image"], + }, + }, "supports_native_streaming": {"type": "boolean"}, }, "additionalProperties": False, diff --git a/tests/pass_through_tests/test_openai_assistants_passthrough.py b/tests/pass_through_tests/test_openai_assistants_passthrough.py index 694d3c090e..40361ab39f 100644 --- a/tests/pass_through_tests/test_openai_assistants_passthrough.py +++ b/tests/pass_through_tests/test_openai_assistants_passthrough.py @@ -2,14 +2,31 @@ import pytest import openai import aiohttp import asyncio +import tempfile from typing_extensions import override from openai import AssistantEventHandler + client = openai.OpenAI(base_url="http://0.0.0.0:4000/openai", api_key="sk-1234") +def test_pass_through_file_operations(): + # Create a temporary file + with tempfile.NamedTemporaryFile(mode='w+', suffix='.txt', delete=False) as temp_file: + temp_file.write("This is a test file for the OpenAI Assistants API.") + temp_file.flush() + + # create a file + file = client.files.create( + file=open(temp_file.name, "rb"), + purpose="assistants", + ) + print("file created", file) + + # delete the file + delete_file = client.files.delete(file.id) + print("file deleted", delete_file) def test_openai_assistants_e2e_operations(): - assistant = client.beta.assistants.create( name="Math Tutor", instructions="You are a personal math tutor. Write and run code to answer math questions.", diff --git a/ui/litellm-dashboard/package-lock.json b/ui/litellm-dashboard/package-lock.json index 39ab75d8c7..960b9e865b 100644 --- a/ui/litellm-dashboard/package-lock.json +++ b/ui/litellm-dashboard/package-lock.json @@ -21,7 +21,7 @@ "jsonwebtoken": "^9.0.2", "jwt-decode": "^4.0.0", "moment": "^2.30.1", - "next": "^14.2.25", + "next": "^14.2.26", "openai": "^4.28.0", "papaparse": "^5.5.2", "react": "^18", @@ -418,9 +418,9 @@ } }, "node_modules/@next/env": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.25.tgz", - "integrity": "sha512-JnzQ2cExDeG7FxJwqAksZ3aqVJrHjFwZQAEJ9gQZSoEhIow7SNoKZzju/AwQ+PLIR4NY8V0rhcVozx/2izDO0w==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.26.tgz", + "integrity": "sha512-vO//GJ/YBco+H7xdQhzJxF7ub3SUwft76jwaeOyVVQFHCi5DCnkP16WHB+JBylo4vOKPoZBlR94Z8xBxNBdNJA==", "license": "MIT" }, "node_modules/@next/eslint-plugin-next": { @@ -433,9 +433,9 @@ } }, "node_modules/@next/swc-darwin-arm64": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.25.tgz", - "integrity": "sha512-09clWInF1YRd6le00vt750s3m7SEYNehz9C4PUcSu3bAdCTpjIV4aTYQZ25Ehrr83VR1rZeqtKUPWSI7GfuKZQ==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.26.tgz", + "integrity": "sha512-zDJY8gsKEseGAxG+C2hTMT0w9Nk9N1Sk1qV7vXYz9MEiyRoF5ogQX2+vplyUMIfygnjn9/A04I6yrUTRTuRiyQ==", "cpu": [ "arm64" ], @@ -449,9 +449,9 @@ } }, "node_modules/@next/swc-darwin-x64": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.25.tgz", - "integrity": "sha512-V+iYM/QR+aYeJl3/FWWU/7Ix4b07ovsQ5IbkwgUK29pTHmq+5UxeDr7/dphvtXEq5pLB/PucfcBNh9KZ8vWbug==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.26.tgz", + "integrity": "sha512-U0adH5ryLfmTDkahLwG9sUQG2L0a9rYux8crQeC92rPhi3jGQEY47nByQHrVrt3prZigadwj/2HZ1LUUimuSbg==", "cpu": [ "x64" ], @@ -465,9 +465,9 @@ } }, "node_modules/@next/swc-linux-arm64-gnu": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.25.tgz", - "integrity": "sha512-LFnV2899PJZAIEHQ4IMmZIgL0FBieh5keMnriMY1cK7ompR+JUd24xeTtKkcaw8QmxmEdhoE5Mu9dPSuDBgtTg==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.26.tgz", + "integrity": "sha512-SINMl1I7UhfHGM7SoRiw0AbwnLEMUnJ/3XXVmhyptzriHbWvPPbbm0OEVG24uUKhuS1t0nvN/DBvm5kz6ZIqpg==", "cpu": [ "arm64" ], @@ -481,9 +481,9 @@ } }, "node_modules/@next/swc-linux-arm64-musl": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.25.tgz", - "integrity": "sha512-QC5y5PPTmtqFExcKWKYgUNkHeHE/z3lUsu83di488nyP0ZzQ3Yse2G6TCxz6nNsQwgAx1BehAJTZez+UQxzLfw==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.26.tgz", + "integrity": "sha512-s6JaezoyJK2DxrwHWxLWtJKlqKqTdi/zaYigDXUJ/gmx/72CrzdVZfMvUc6VqnZ7YEvRijvYo+0o4Z9DencduA==", "cpu": [ "arm64" ], @@ -497,9 +497,9 @@ } }, "node_modules/@next/swc-linux-x64-gnu": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.25.tgz", - "integrity": "sha512-y6/ML4b9eQ2D/56wqatTJN5/JR8/xdObU2Fb1RBidnrr450HLCKr6IJZbPqbv7NXmje61UyxjF5kvSajvjye5w==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.26.tgz", + "integrity": "sha512-FEXeUQi8/pLr/XI0hKbe0tgbLmHFRhgXOUiPScz2hk0hSmbGiU8aUqVslj/6C6KA38RzXnWoJXo4FMo6aBxjzg==", "cpu": [ "x64" ], @@ -513,9 +513,9 @@ } }, "node_modules/@next/swc-linux-x64-musl": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.25.tgz", - "integrity": "sha512-sPX0TSXHGUOZFvv96GoBXpB3w4emMqKeMgemrSxI7A6l55VBJp/RKYLwZIB9JxSqYPApqiREaIIap+wWq0RU8w==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.26.tgz", + "integrity": "sha512-BUsomaO4d2DuXhXhgQCVt2jjX4B4/Thts8nDoIruEJkhE5ifeQFtvW5c9JkdOtYvE5p2G0hcwQ0UbRaQmQwaVg==", "cpu": [ "x64" ], @@ -529,9 +529,9 @@ } }, "node_modules/@next/swc-win32-arm64-msvc": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.25.tgz", - "integrity": "sha512-ReO9S5hkA1DU2cFCsGoOEp7WJkhFzNbU/3VUF6XxNGUCQChyug6hZdYL/istQgfT/GWE6PNIg9cm784OI4ddxQ==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.26.tgz", + "integrity": "sha512-5auwsMVzT7wbB2CZXQxDctpWbdEnEW/e66DyXO1DcgHxIyhP06awu+rHKshZE+lPLIGiwtjo7bsyeuubewwxMw==", "cpu": [ "arm64" ], @@ -545,9 +545,9 @@ } }, "node_modules/@next/swc-win32-ia32-msvc": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.25.tgz", - "integrity": "sha512-DZ/gc0o9neuCDyD5IumyTGHVun2dCox5TfPQI/BJTYwpSNYM3CZDI4i6TOdjeq1JMo+Ug4kPSMuZdwsycwFbAw==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.26.tgz", + "integrity": "sha512-GQWg/Vbz9zUGi9X80lOeGsz1rMH/MtFO/XqigDznhhhTfDlDoynCM6982mPCbSlxJ/aveZcKtTlwfAjwhyxDpg==", "cpu": [ "ia32" ], @@ -561,9 +561,9 @@ } }, "node_modules/@next/swc-win32-x64-msvc": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.25.tgz", - "integrity": "sha512-KSznmS6eFjQ9RJ1nEc66kJvtGIL1iZMYmGEXsZPh2YtnLtqrgdVvKXJY2ScjjoFnG6nGLyPFR0UiEvDwVah4Tw==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.26.tgz", + "integrity": "sha512-2rdB3T1/Gp7bv1eQTTm9d1Y1sv9UuJ2LAwOE0Pe2prHKe32UNscj7YS13fRB37d0GAiGNR+Y7ZcW8YjDI8Ns0w==", "cpu": [ "x64" ], @@ -5011,12 +5011,12 @@ "dev": true }, "node_modules/next": { - "version": "14.2.25", - "resolved": "https://registry.npmjs.org/next/-/next-14.2.25.tgz", - "integrity": "sha512-N5M7xMc4wSb4IkPvEV5X2BRRXUmhVHNyaXwEM86+voXthSZz8ZiRyQW4p9mwAoAPIm6OzuVZtn7idgEJeAJN3Q==", + "version": "14.2.26", + "resolved": "https://registry.npmjs.org/next/-/next-14.2.26.tgz", + "integrity": "sha512-b81XSLihMwCfwiUVRRja3LphLo4uBBMZEzBBWMaISbKTwOmq3wPknIETy/8000tr7Gq4WmbuFYPS7jOYIf+ZJw==", "license": "MIT", "dependencies": { - "@next/env": "14.2.25", + "@next/env": "14.2.26", "@swc/helpers": "0.5.5", "busboy": "1.6.0", "caniuse-lite": "^1.0.30001579", @@ -5031,15 +5031,15 @@ "node": ">=18.17.0" }, "optionalDependencies": { - "@next/swc-darwin-arm64": "14.2.25", - "@next/swc-darwin-x64": "14.2.25", - "@next/swc-linux-arm64-gnu": "14.2.25", - "@next/swc-linux-arm64-musl": "14.2.25", - "@next/swc-linux-x64-gnu": "14.2.25", - "@next/swc-linux-x64-musl": "14.2.25", - "@next/swc-win32-arm64-msvc": "14.2.25", - "@next/swc-win32-ia32-msvc": "14.2.25", - "@next/swc-win32-x64-msvc": "14.2.25" + "@next/swc-darwin-arm64": "14.2.26", + "@next/swc-darwin-x64": "14.2.26", + "@next/swc-linux-arm64-gnu": "14.2.26", + "@next/swc-linux-arm64-musl": "14.2.26", + "@next/swc-linux-x64-gnu": "14.2.26", + "@next/swc-linux-x64-musl": "14.2.26", + "@next/swc-win32-arm64-msvc": "14.2.26", + "@next/swc-win32-ia32-msvc": "14.2.26", + "@next/swc-win32-x64-msvc": "14.2.26" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", diff --git a/ui/litellm-dashboard/package.json b/ui/litellm-dashboard/package.json index 895e2576cc..c951796020 100644 --- a/ui/litellm-dashboard/package.json +++ b/ui/litellm-dashboard/package.json @@ -22,7 +22,7 @@ "jsonwebtoken": "^9.0.2", "jwt-decode": "^4.0.0", "moment": "^2.30.1", - "next": "^14.2.25", + "next": "^14.2.26", "openai": "^4.28.0", "papaparse": "^5.5.2", "react": "^18", diff --git a/ui/litellm-dashboard/src/app/page.tsx b/ui/litellm-dashboard/src/app/page.tsx index 592c7bf0f2..df47c528dc 100644 --- a/ui/litellm-dashboard/src/app/page.tsx +++ b/ui/litellm-dashboard/src/app/page.tsx @@ -314,6 +314,8 @@ export default function CreateKeyPage() {