Add /vllm/* and /mistral/* passthrough endpoints (adds support for Mistral OCR via passthrough)

* feat(llm_passthrough_endpoints.py): support mistral passthrough

Closes https://github.com/BerriAI/litellm/issues/9051

* feat(llm_passthrough_endpoints.py): initial commit for adding vllm passthrough route

* feat(vllm/common_utils.py): add new vllm model info route

make it possible to use vllm passthrough route via factory function

* fix(llm_passthrough_endpoints.py): add all methods to vllm passthrough route

* fix: fix linting error

* fix: fix linting error

* fix: fix ruff check

* fix(proxy/_types.py): add new passthrough routes

* docs(config_settings.md): add mistral env vars to docs
This commit is contained in:
Krish Dholakia 2025-04-14 22:06:33 -07:00 committed by GitHub
parent 8faf56922c
commit 9b0f871129
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 450 additions and 176 deletions

View file

@ -6,6 +6,7 @@ Provider-specific Pass-Through Endpoints
Use litellm with Anthropic SDK, Vertex AI SDK, Cohere SDK, etc.
"""
import os
from typing import Optional
import httpx
@ -43,6 +44,84 @@ def create_request_copy(request: Request):
}
async def llm_passthrough_factory_proxy_route(
custom_llm_provider: str,
endpoint: str,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Factory function for creating pass-through endpoints for LLM providers.
"""
from litellm.types.utils import LlmProviders
from litellm.utils import ProviderConfigManager
provider_config = ProviderConfigManager.get_provider_model_info(
provider=LlmProviders(custom_llm_provider),
model=None,
)
if provider_config is None:
raise HTTPException(
status_code=404, detail=f"Provider {custom_llm_provider} not found"
)
base_target_url = provider_config.get_api_base()
if base_target_url is None:
raise HTTPException(
status_code=404, detail=f"Provider {custom_llm_provider} api base not found"
)
encoded_endpoint = httpx.URL(endpoint).path
# Ensure endpoint starts with '/' for proper URL construction
if not encoded_endpoint.startswith("/"):
encoded_endpoint = "/" + encoded_endpoint
# Construct the full target URL using httpx
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)
# Add or update query parameters
provider_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider=custom_llm_provider,
region_name=None,
)
auth_headers = provider_config.validate_environment(
headers={},
model="",
messages=[],
optional_params={},
litellm_params={},
api_key=provider_api_key,
api_base=base_target_url,
)
## check for streaming
is_streaming_request = False
# anthropic is streaming when 'stream' = True is in the body
if request.method == "POST":
_request_body = await request.json()
if _request_body.get("stream"):
is_streaming_request = True
## CREATE PASS-THROUGH
endpoint_func = create_pass_through_route(
endpoint=endpoint,
target=str(updated_url),
custom_headers=auth_headers,
) # dynamically construct pass-through endpoint based on incoming path
received_value = await endpoint_func(
request,
fastapi_response,
user_api_key_dict,
stream=is_streaming_request, # type: ignore
)
return received_value
@router.api_route(
"/gemini/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
@ -162,6 +241,84 @@ async def cohere_proxy_route(
return received_value
@router.api_route(
"/vllm/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
tags=["VLLM Pass-through", "pass-through"],
)
async def vllm_proxy_route(
endpoint: str,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[Docs](https://docs.litellm.ai/docs/pass_through/vllm)
"""
return await llm_passthrough_factory_proxy_route(
endpoint=endpoint,
request=request,
fastapi_response=fastapi_response,
user_api_key_dict=user_api_key_dict,
custom_llm_provider="vllm",
)
@router.api_route(
"/mistral/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
tags=["Mistral Pass-through", "pass-through"],
)
async def mistral_proxy_route(
endpoint: str,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[Docs](https://docs.litellm.ai/docs/anthropic_completion)
"""
base_target_url = os.getenv("MISTRAL_API_BASE") or "https://api.mistral.ai"
encoded_endpoint = httpx.URL(endpoint).path
# Ensure endpoint starts with '/' for proper URL construction
if not encoded_endpoint.startswith("/"):
encoded_endpoint = "/" + encoded_endpoint
# Construct the full target URL using httpx
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)
# Add or update query parameters
mistral_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="mistral",
region_name=None,
)
## check for streaming
is_streaming_request = False
# anthropic is streaming when 'stream' = True is in the body
if request.method == "POST":
_request_body = await request.json()
if _request_body.get("stream"):
is_streaming_request = True
## CREATE PASS-THROUGH
endpoint_func = create_pass_through_route(
endpoint=endpoint,
target=str(updated_url),
custom_headers={"Authorization": "Bearer {}".format(mistral_api_key)},
) # dynamically construct pass-through endpoint based on incoming path
received_value = await endpoint_func(
request,
fastapi_response,
user_api_key_dict,
stream=is_streaming_request, # type: ignore
)
return received_value
@router.api_route(
"/anthropic/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],