add native cachedContents endpoint

2025-04-26 19:24:27 +00:00 · 2024-08-08 15:32:07 -07:00 · 2024-08-08 15:32:07 -07:00 · b9e1d60357
commit b9e1d60357
parent a87ece0a01
2 changed files with 29 additions and 0 deletions
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@ -278,6 +278,8 @@ class VertexFineTuningAPI(VertexLLM):
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        elif "countTokens" in request_route:
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        elif "cachedContents" in request_route:
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        else:
            raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
        if self.async_handler is None:
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@ -303,3 +303,30 @@ async def vertex_cancel_fine_tuning_job(
        return response
    except Exception as e:
        raise exception_handler(e) from e
@router.post(
    "/vertex-ai/cachedContents",
    dependencies=[Depends(user_api_key_auth)],
    tags=["Vertex AI endpoints"],
 )
 async def vertex_create_add_cached_content(
    request: Request,
    fastapi_response: Response,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint
    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
    """
    try:
        response = await execute_post_vertex_ai_request(
            request=request,
            route="/cachedContents",
        )
        return response
    except Exception as e:
        raise exception_handler(e) from e