add vertex embeddings endpoints

2025-04-27 03:34:10 +00:00 · 2024-08-03 17:26:49 -07:00 · 2024-08-03 17:26:49 -07:00 · 01cd07fa75
commit 01cd07fa75
parent beb7f1b2c6
2 changed files with 41 additions and 1 deletions
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@ -272,7 +272,8 @@ class VertexFineTuningAPI(VertexLLM):
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}"
        elif "generateContent" in request_route:
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
-
+        elif "predict" in request_route:
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        if self.async_handler is None:
            raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@ -143,6 +143,45 @@ async def vertex_generate_content(
        raise exception_handler(e) from e
@router.post(
    "/vertex-ai/publishers/google/models/{model_id:path}:predict",
    dependencies=[Depends(user_api_key_auth)],
    tags=["Vertex AI endpoints"],
 )
 async def vertex_predict_endpoint(
    request: Request,
    fastapi_response: Response,
    model_id: str,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    this is a pass through endpoint for the Vertex AI API. /predict endpoint
    Use this for:
    - Embeddings API - Text Embedding, Multi Modal Embedding
    - Imagen API
    - Code Completion API
    Example Curl:
    ```
    curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
      -H "Content-Type: application/json" \
      -H "Authorization: Bearer sk-1234" \
      -d '{"instances":[{"content": "gm"}]}'
    ```
    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#generative-ai-get-text-embedding-drest
    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
    """
    try:
        response = await execute_post_vertex_ai_request(
            request=request,
            route=f"/publishers/google/models/{model_id}:predict",
        )
        return response
    except Exception as e:
        raise exception_handler(e) from e
@router.post(
    "/vertex-ai/tuningJobs",
    dependencies=[Depends(user_api_key_auth)],