Add /vllm/* and /mistral/* passthrough endpoints (adds support for Mistral OCR via passthrough)

* feat(llm_passthrough_endpoints.py): support mistral passthrough Closes https://github.com/BerriAI/litellm/issues/9051 * feat(llm_passthrough_endpoints.py): initial commit for adding vllm passthrough route * feat(vllm/common_utils.py): add new vllm model info route make it possible to use vllm passthrough route via factory function * fix(llm_passthrough_endpoints.py): add all methods to vllm passthrough route * fix: fix linting error * fix: fix linting error * fix: fix ruff check * fix(proxy/_types.py): add new passthrough routes * docs(config_settings.md): add mistral env vars to docs
2025-04-25 18:54:30 +00:00 · 2025-04-14 22:06:33 -07:00 · 2025-04-14 22:06:33 -07:00 · 9b0f871129
commit 9b0f871129
parent 8faf56922c
12 changed files with 450 additions and 176 deletions
--- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@ -6,6 +6,7 @@ Provider-specific Pass-Through Endpoints
 Use litellm with Anthropic SDK, Vertex AI SDK, Cohere SDK, etc.
 """

+import os
 from typing import Optional

 import httpx
@ -43,6 +44,84 @@ def create_request_copy(request: Request):
    }


+async def llm_passthrough_factory_proxy_route(
+    custom_llm_provider: str,
+    endpoint: str,
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Factory function for creating pass-through endpoints for LLM providers.
+    """
+    from litellm.types.utils import LlmProviders
+    from litellm.utils import ProviderConfigManager
+
+    provider_config = ProviderConfigManager.get_provider_model_info(
+        provider=LlmProviders(custom_llm_provider),
+        model=None,
+    )
+    if provider_config is None:
+        raise HTTPException(
+            status_code=404, detail=f"Provider {custom_llm_provider} not found"
+        )
+    base_target_url = provider_config.get_api_base()
+
+    if base_target_url is None:
+        raise HTTPException(
+            status_code=404, detail=f"Provider {custom_llm_provider} api base not found"
+        )
+
+    encoded_endpoint = httpx.URL(endpoint).path
+
+    # Ensure endpoint starts with '/' for proper URL construction
+    if not encoded_endpoint.startswith("/"):
+        encoded_endpoint = "/" + encoded_endpoint
+
+    # Construct the full target URL using httpx
+    base_url = httpx.URL(base_target_url)
+    updated_url = base_url.copy_with(path=encoded_endpoint)
+
+    # Add or update query parameters
+    provider_api_key = passthrough_endpoint_router.get_credentials(
+        custom_llm_provider=custom_llm_provider,
+        region_name=None,
+    )
+
+    auth_headers = provider_config.validate_environment(
+        headers={},
+        model="",
+        messages=[],
+        optional_params={},
+        litellm_params={},
+        api_key=provider_api_key,
+        api_base=base_target_url,
+    )
+
+    ## check for streaming
+    is_streaming_request = False
+    # anthropic is streaming when 'stream' = True is in the body
+    if request.method == "POST":
+        _request_body = await request.json()
+        if _request_body.get("stream"):
+            is_streaming_request = True
+
+    ## CREATE PASS-THROUGH
+    endpoint_func = create_pass_through_route(
+        endpoint=endpoint,
+        target=str(updated_url),
+        custom_headers=auth_headers,
+    )  # dynamically construct pass-through endpoint based on incoming path
+    received_value = await endpoint_func(
+        request,
+        fastapi_response,
+        user_api_key_dict,
+        stream=is_streaming_request,  # type: ignore
+    )
+
+    return received_value
+
+
@router.api_route(
    "/gemini/{endpoint:path}",
    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
@ -162,6 +241,84 @@ async def cohere_proxy_route(
    return received_value


+@router.api_route(
+    "/vllm/{endpoint:path}",
+    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
+    tags=["VLLM Pass-through", "pass-through"],
+)
+async def vllm_proxy_route(
+    endpoint: str,
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [Docs](https://docs.litellm.ai/docs/pass_through/vllm)
+    """
+    return await llm_passthrough_factory_proxy_route(
+        endpoint=endpoint,
+        request=request,
+        fastapi_response=fastapi_response,
+        user_api_key_dict=user_api_key_dict,
+        custom_llm_provider="vllm",
+    )
+
+
+@router.api_route(
+    "/mistral/{endpoint:path}",
+    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
+    tags=["Mistral Pass-through", "pass-through"],
+)
+async def mistral_proxy_route(
+    endpoint: str,
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [Docs](https://docs.litellm.ai/docs/anthropic_completion)
+    """
+    base_target_url = os.getenv("MISTRAL_API_BASE") or "https://api.mistral.ai"
+    encoded_endpoint = httpx.URL(endpoint).path
+
+    # Ensure endpoint starts with '/' for proper URL construction
+    if not encoded_endpoint.startswith("/"):
+        encoded_endpoint = "/" + encoded_endpoint
+
+    # Construct the full target URL using httpx
+    base_url = httpx.URL(base_target_url)
+    updated_url = base_url.copy_with(path=encoded_endpoint)
+
+    # Add or update query parameters
+    mistral_api_key = passthrough_endpoint_router.get_credentials(
+        custom_llm_provider="mistral",
+        region_name=None,
+    )
+
+    ## check for streaming
+    is_streaming_request = False
+    # anthropic is streaming when 'stream' = True is in the body
+    if request.method == "POST":
+        _request_body = await request.json()
+        if _request_body.get("stream"):
+            is_streaming_request = True
+
+    ## CREATE PASS-THROUGH
+    endpoint_func = create_pass_through_route(
+        endpoint=endpoint,
+        target=str(updated_url),
+        custom_headers={"Authorization": "Bearer {}".format(mistral_api_key)},
+    )  # dynamically construct pass-through endpoint based on incoming path
+    received_value = await endpoint_func(
+        request,
+        fastapi_response,
+        user_api_key_dict,
+        stream=is_streaming_request,  # type: ignore
+    )
+
+    return received_value
+
+
@router.api_route(
    "/anthropic/{endpoint:path}",
    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],