mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
add vertex embeddings endpoints
This commit is contained in:
parent
beb7f1b2c6
commit
01cd07fa75
2 changed files with 41 additions and 1 deletions
|
@ -272,7 +272,8 @@ class VertexFineTuningAPI(VertexLLM):
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}"
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}"
|
||||||
elif "generateContent" in request_route:
|
elif "generateContent" in request_route:
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||||
|
elif "predict" in request_route:
|
||||||
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||||
if self.async_handler is None:
|
if self.async_handler is None:
|
||||||
raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
|
raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
|
||||||
|
|
||||||
|
|
|
@ -143,6 +143,45 @@ async def vertex_generate_content(
|
||||||
raise exception_handler(e) from e
|
raise exception_handler(e) from e
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/vertex-ai/publishers/google/models/{model_id:path}:predict",
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
tags=["Vertex AI endpoints"],
|
||||||
|
)
|
||||||
|
async def vertex_predict_endpoint(
|
||||||
|
request: Request,
|
||||||
|
fastapi_response: Response,
|
||||||
|
model_id: str,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
this is a pass through endpoint for the Vertex AI API. /predict endpoint
|
||||||
|
Use this for:
|
||||||
|
- Embeddings API - Text Embedding, Multi Modal Embedding
|
||||||
|
- Imagen API
|
||||||
|
- Code Completion API
|
||||||
|
|
||||||
|
Example Curl:
|
||||||
|
```
|
||||||
|
curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{"instances":[{"content": "gm"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#generative-ai-get-text-embedding-drest
|
||||||
|
it uses the vertex ai credentials on the proxy and forwards to vertex ai api
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = await execute_post_vertex_ai_request(
|
||||||
|
request=request,
|
||||||
|
route=f"/publishers/google/models/{model_id}:predict",
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise exception_handler(e) from e
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/vertex-ai/tuningJobs",
|
"/vertex-ai/tuningJobs",
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue