diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index 2d3e8d1c38..f964e7bba2 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -272,7 +272,8 @@ class VertexFineTuningAPI(VertexLLM): url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}" elif "generateContent" in request_route: url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" - + elif "predict" in request_route: + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" if self.async_handler is None: raise ValueError("VertexAI Fine Tuning - async_handler is not initialized") diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py index 7ef552508b..da63aa0250 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -143,6 +143,45 @@ async def vertex_generate_content( raise exception_handler(e) from e +@router.post( + "/vertex-ai/publishers/google/models/{model_id:path}:predict", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_predict_endpoint( + request: Request, + fastapi_response: Response, + model_id: str, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /predict endpoint + Use this for: + - Embeddings API - Text Embedding, Multi Modal Embedding + - Imagen API + - Code Completion API + + Example Curl: + ``` + curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"instances":[{"content": "gm"}]}' + ``` + + Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#generative-ai-get-text-embedding-drest + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + response = await execute_post_vertex_ai_request( + request=request, + route=f"/publishers/google/models/{model_id}:predict", + ) + return response + except Exception as e: + raise exception_handler(e) from e + + @router.post( "/vertex-ai/tuningJobs", dependencies=[Depends(user_api_key_auth)],